diff --git a/ObsoleteFiles.inc b/ObsoleteFiles.inc index f9e3ca9ce1ef..c77b36ac81b8 100644 --- a/ObsoleteFiles.inc +++ b/ObsoleteFiles.inc @@ -38,6 +38,22 @@ # xargs -n1 | sort | uniq -d; # done +# 20120415: new clang import which bumps version from 3.0 to 3.1 +OLD_FILES+=usr/include/clang/3.0/altivec.h +OLD_FILES+=usr/include/clang/3.0/avxintrin.h +OLD_FILES+=usr/include/clang/3.0/emmintrin.h +OLD_FILES+=usr/include/clang/3.0/immintrin.h +OLD_FILES+=usr/include/clang/3.0/mm3dnow.h +OLD_FILES+=usr/include/clang/3.0/mm_malloc.h +OLD_FILES+=usr/include/clang/3.0/mmintrin.h +OLD_FILES+=usr/include/clang/3.0/nmmintrin.h +OLD_FILES+=usr/include/clang/3.0/pmmintrin.h +OLD_FILES+=usr/include/clang/3.0/smmintrin.h +OLD_FILES+=usr/include/clang/3.0/tmmintrin.h +OLD_FILES+=usr/include/clang/3.0/wmmintrin.h +OLD_FILES+=usr/include/clang/3.0/x86intrin.h +OLD_FILES+=usr/include/clang/3.0/xmmintrin.h +OLD_DIRS+=usr/include/clang/3.0 # 20120322: Update heimdal to 1.5.1. OLD_FILES+=usr/include/krb5-v4compat.h \ usr/include/krb_err.h \ diff --git a/UPDATING b/UPDATING index b98b7d0389c2..89021c5015bb 100644 --- a/UPDATING +++ b/UPDATING @@ -19,8 +19,19 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 10.x IS SLOW: includes various WITNESS- related kernel options, INVARIANTS, malloc debugging flags in userland, and various verbose features in the kernel. Many developers choose to disable these features on build - machines to maximize performance. (To disable malloc debugging, run - ln -s aj /etc/malloc.conf.) + machines to maximize performance. (To completely disable malloc + debugging, define MALLOC_PRODUCTION in /etc/make.conf, or to merely + disable the most expensive debugging functionality run + "ln -s 'abort:false,junk:false' /etc/malloc.conf".) + +20120417: + The malloc(3) implementation embedded in libc now uses sources imported + as contrib/jemalloc. The most disruptive API change is to + /etc/malloc.conf. If your system has an old-style /etc/malloc.conf, + delete it prior to installworld, and optionally re-create it using the + new format after rebooting. See malloc.conf(5) for details + (specifically the TUNING section and the "opt.*" entries in the MALLCTL + NAMESPACE section). 20120328: Big-endian MIPS TARGET_ARCH values no longer end in "eb". mips64eb diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool.8 b/cddl/contrib/opensolaris/cmd/zpool/zpool.8 index cea41bcb5821..ef3900eb9f15 100644 --- a/cddl/contrib/opensolaris/cmd/zpool/zpool.8 +++ b/cddl/contrib/opensolaris/cmd/zpool/zpool.8 @@ -1779,7 +1779,7 @@ The following command creates a storage pool consisting of two, two-way mirrors and mirrored log devices: .Bd -literal -offset 2n -.Li # Ic zpool create pool mirror da0 da1 mirror da2 da3 log miror da4 da5 +.Li # Ic zpool create pool mirror da0 da1 mirror da2 da3 log mirror da4 da5 .Ed .It Sy Example 14 No Adding Cache Devices to a Tn ZFS No Pool .Pp diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_proc.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_proc.c index 664a1225b311..caeeaa6b5b76 100644 --- a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_proc.c +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_proc.c @@ -811,7 +811,7 @@ dt_proc_destroy(dtrace_hdl_t *dtp, struct ps_prochandle *P) #if defined(sun) (void) _lwp_kill(dpr->dpr_tid, SIGCANCEL); #else - pthread_kill(dpr->dpr_tid, SIGUSR1); + pthread_kill(dpr->dpr_tid, SIGTHR); #endif /* diff --git a/contrib/bind9/RELEASE-NOTES-BIND-9.8.1.html b/contrib/bind9/RELEASE-NOTES-BIND-9.8.1.html deleted file mode 100644 index c4deae43a9cf..000000000000 --- a/contrib/bind9/RELEASE-NOTES-BIND-9.8.1.html +++ /dev/null @@ -1,368 +0,0 @@ -

- -

Introduction

- -

- BIND 9.8.1 is the current production release of BIND 9.8. -

-

- This document summarizes changes from BIND 9.8.0 to BIND 9.8.1. - Please see the CHANGES file in the source code release for a - complete list of all changes. -

-
- -

Download

- -

- The latest versions of BIND 9 software can always be found - on our web site at - http://www.isc.org/downloads/all. - There you will find additional information about each - release, source code, and some pre-compiled versions for certain operating systems. -

-
- -

Support

- -

Product support information is available on - http://www.isc.org/services/support - for paid support options. Free support is provided by our user - community via a mailing list. Information on all public email - lists is available at - https://lists.isc.org/mailman/listinfo. -

-
- -

New Features

- -

9.8.1

- -
  • -Added a new include file with function typedefs -for the DLZ "dlopen" driver. [RT #23629] -
  • -Added a tool able to generate malformed packets to allow testing -of how named handles them. -[RT #24096] -
  • -The root key is now provided in the file bind.keys allowing DNSSEC validation to be switched on at start up by adding "dnssec-validation auto;" to named.conf. If the root key provided has expired, named will log the expiration and validation will not work. More information and the most current copy of bind.keys can be found at http://www.isc.org/bind-keys. *Please note this feature was actually added in 9.8.0 but was not included in the 9.8.0 release notes. [RT #21727] -
-
-
- -

Security Fixes

- -

9.8.1

- -
  • -If named is configured with a response policy zone (RPZ) and a query -of type RRSIG is received for a name configured for RRset replacement -in that RPZ, it will trigger an INSIST and crash the server. -RRSIG. [RT #24280] -
  • -named, set up to be a caching resolver, is vulnerable to a -user querying a domain with very large resource record sets (RRSets) -when trying to negatively cache the response. Due to an off-by-one -error, caching the response could cause named to crash. [RT #24650] -[CVE-2011-1910] -
  • -Using Response Policy Zone (RPZ) to query a wildcard CNAME label with -QUERY type SIG/RRSIG, it can cause named to crash. Fix is query type -independant. -[RT #24715] -
  • -Using Response Policy Zone (RPZ) with DNAME records and querying the -subdomain of that label can cause named to crash. Now logs that DNAME -is not supported. -[RT #24766] -
  • -Change #2912 populated the message section in replies to UPDATE requests, -which some Windows clients wanted. This exposed a latent bug that allowed -the response message to crash named. With this fix, change 2912 has been -reduced to copy only the zone section to the reply. A more complete fix -for the latent bug will be released later. -[RT #24777] -
-
-
- - -

Feature Changes

- -

9.8.1

- -
  • -Merged in the NetBSD ATF test framework (currently -version 0.12) for development of future unit tests. -Use configure --with-atf to build ATF internally -or configure --with-atf=prefix to use an external -copy. [RT #23209] -
  • -Added more verbose error reporting from DLZ LDAP. [RT #23402] -
  • -The DLZ "dlopen" driver is now built by default, -no longer requiring a configure option. To -disable it, use "configure --without-dlopen". -(Note: driver not supported on win32.) [RT #23467] -
  • -Replaced compile time constant with STDTIME_ON_32BITS. -[RT #23587] -
  • -Make --with-gssapi default for ./configure. [RT #23738] -
  • -Improved the startup time for an authoritative server with a large -number of zones by making the zone task table of variable size -rather than fixed size. This means that authoritative servers with -lots of zones will be serving that zone data much sooner. [RT #24406] -
  • -Per RFC 6303, RFC 1918 reverse zones are now part of the built-in list of empty zones. [RT #24990] -
-
-
-

Bug Fixes

- -

9.8.1

- -
  • -During RFC5011 processing some journal write errors were not detected. -This could lead to managed-keys changes being committed but not -recorded in the journal files, causing potential inconsistencies -during later processing. [RT #20256] -
  • -A potential NULL pointer deference in the DNS64 code could cause -named to terminate unexpectedly. [RT #20256] -
  • -A state variable relating to DNSSEC could fail to be set during -some infrequently-executed code paths, allowing it to be used whilst -in an unitialized state during cache updates, with unpredictable results. -[RT #20256] -
  • -A potential NULL pointer deference in DNSSEC signing code could -cause named to terminate unexpectedly [RT #20256] -
  • -Several cosmetic code changes were made to silence warnings -generated by a static code analysis tool. [RT #20256] -
  • -When using the -x (sign with only KSK) option on dnssec-signzone, -it could incorrectly count the number of ZSKs in the zone. (And in 9.9.0, -some code cleanup and improved warning messages). [RT #20852] -
  • -When using _builtin in named.conf, named.conf changes were not found -when reloading the config file. Now checks _builtin zone arguments -to see if the zone is re-usable or not. [RT #21914] -
  • -Running dnssec-settime -f on an old-style key will -now force the key to be rewritten to the new key format even if no -other change has been specified, using "-P now -A now" -as default values. [RT #22474] -
  • -After an external code review, a code cleanup was done. [RT #22521] -
  • -Cause named to terminate at startup or rndc reconfig -reload to fail, if a log file specified in the -conf file isn't a plain file. (RT #22771] -
  • -named now forces the ADB cache time for glue related data to zero -instead of relying on TTL. This corrects problematic behavior in cases -where a server was authoritative for the A record of a nameserver for a -delegated zone and was queried to recursively resolve records within -that zone. [RT #22842] -
  • -When a validating resolver got a NODATA response for DNSKEY, it was -not caching the NODATA. Fixed and test added. [RT #22908] -
  • -Fixed a bug in which zone keys that were published -and but not immediately activated, automatic signing could fail to trigger. -[RT #22911] -
  • -Fixed precedence order bug with NS and DNAME records if both are present. -(Also fixed timing of autosign test in 9.7+) [RT #23035] -
  • -When a DNSSEC signed dynamic zone's signatures need to be refreshed, -named would first delete the old signatures in the zone. If a private -key of the same algorithm isn't available to named, the signing would -fail but the old signatures would already be deleted. named now checks -if it can access the private key before deleting the old signatures and -leaves the old signature if no private key is found. [RT #23136] -
  • -When using "auto-dnssec maintain" and rolling to a new key, a -private-type record (only used internally by named) could be created -and not marked as complete. [RT #23253] -
  • -Fixed last autosign test report. [RT #23256] -
  • -named didn't save gid at startup and later assumed gid 0. -named now saves/restores the gid when creating creating -named.pid at startup. [RT #23290] -
  • -If the server has an IPv6 address but does not have IPv6 connectivity -to the internet, dig +trace could fail attempting to use IPv6 -addresses. [RT #23297] -
  • -If named is configured with managed zones, the managed key maint timer -can exercise a race condition that can crash the server. -[RT #23303] -
  • -Changing TTL did not cause dnssec-signzone to generate new signatures. -[RT #23330] -
  • -Have the validating resolver use RRSIG original TTL to compute -validated RRset and RRSIG TTL. [RT #23332] -
  • -In "make test" bin/tests/resolver, hold the socket manager lock -while freeing the socket. -[RT #23333] -
  • -If named encountered a CNAME instead of a DS record when walking -the chain of trust down from the trust anchor, it incorrectly stopped -validating. [RT #23338] -
  • -dns/view.h needed dns/rpz.h but it wasn't in the Makfile.in -HEADERS variable. [RT #23342] -
  • -RRSIG records could have time stamps too far in the future. -[RT #23356] -
  • -named stores cached data in an in-memory database and keeps track of -how recently the data is used with a heap. The heap is stored within the -cache's memory space. Under a sustained high query load and with a small -cache size, this could lead to the heap exhausting the cache space. This -would result in cache misses and SERVFAILs, with named never releasing -the cache memory the heap used up and never recovering. - -This fix removes the heap into its own memory space, preventing the heap -from exhausting the cache space and allowing named to recover gracefully -when the high query load abates. [RT #23371] -
  • -Fully separated key management on a per view basis. [RT #23419] -
  • -If running on a powerpc CPU and with atomic operations enabled, -named could lock up. Added sync instructions to the end of atomic -operations. [RT #23469] -
  • -If OpenSSL was built without engine support, named would have -compile errors and fail to build. -[RT #23473] -
  • -If ./configure finds GOST but not elliptic curve, named fails to -build. Added elliptic curve support check in GOST OpenSSL engine -detection. [RT #23485] -
  • -"rndc secroots" would abort on the first error -and so could miss remaining views. [RT #23488] -
  • -Handle isc_event_allocate failures in t_tasks test. -[RT #23572] -
  • -ixfr-from-differences {master|slave}; -failed to select the master/slave zones, resulting in on diff/journal -file being created. -[RT #23580] -
  • -If a DNAME substitution failed, named returned NOERROR. The correct -response should be YXDOMAIN. -[RT #23591] -
  • -dns_dnssec_findzonekeys{2} used a inconsistant -timestamp when determining which keys are active. This could result in -some RRsets not being signed/re-signed. -[RT #23642] -
  • -Remove bin/tests/system/logfileconfig/ns1/named.conf and -add setup.sh in order to resolve changing named.conf issue. [RT #23687] -
  • -NOTIFY messages were not being sent when generating -a NSEC3 chain incrementally. [RT #23702] -
  • -DDNS updates using SIG(0) with update-policy match -type "external" could cause a crash. Also fixed nsupdate core -dump on shutdown when using a SIG(0) key, due to the key -not being freed. [RT #23735] -
  • -Zones using automatic key maintenance could fail to check the key -repository for updates. named now checks once per hour and the -automatic check bug has been fixed. [RT #23744] -
  • -named now uses the correct strtok/strtok_r/strtok_s based on OS. -[RT #23747] -
  • -Signatures for records at the zone apex could go -stale due to an incorrect timer setting. [RT #23769] -
  • -The autosign tests attempted to open ports within reserved ranges. Test -now avoids those ports. -[RT #23957] -
  • -GSS TGIS test was failing, since log_cred() caused KRB5_KTNAME to -be cached. Now sets KRB5_KTNAME before calling log_cred() in -dst_gssapi_acceptctx(). [RT #24004] -
  • -named, acting as authoritative server for DLZ zones, was not correctly -setting the authoritative (AA) bit. -[RT #24146] -
  • -Clean up some cross-compiling issues and added two undocumented -configure options, --with-gost and --with-rlimtype, to allow over-riding -default settings (gost=no and rlimtype="long int") when cross-compiling. -[RT #24367] -
  • -When trying sign with NSEC3, if dnssec-signzone couldn't find the -KSK, it would give an incorrect error "NSEC3 iterations too big for -weakest DNSKEY strength" rather than the correct "failed to find -keys at the zone apex: not found" [RT #24369] -
  • -Configuring 'dnssec-validation auto' in a view instead of in the -options statement could trigger an assertion failure in named-checkconf. -[RT #24382] -
  • -Improved consistency checks for dnssec-enable and -dnssec-validation, added test cases to the -checkconf system test. [RT #24398] -
  • -If named is configured to be both authoritative and recursive and receives -a recursive query for a CNAME in a zone that it is authoritative for, if that -CNAME also points to a zone the server is authoritative for, the recursive part of name will not follow the CNAME change and the response will not be a -complete CNAME chain. [RT #24455] -
  • -nsupdate could dump core on shutdown when using SIG(0) keys. [RT #24604] -
  • -Named could fail to validate zones list in a DLV that validated insecure -without using DLV and had DS records in the parent zone. [RT #24631] -
  • -dnssec-signzone now records timestamps just before and just after signing, improving the accuracy of signing statistics. [RT #16030] -
  • -If allow-new-zones was set to yes and name-based ACLs were used, named could crash when "rndc reconfig" was issued. [RT #22739] -
  • -RT #23136 fixed a problem where named would delete old signatures even -when the private key wasn't available to re-sign the zone, resulting in -a zone with missing signatures. This fix (CHANGES 3114) did not -completely fix all issues. [RT #24577] -
  • -A bug in FreeBSD kernels causes IPv6 UDP responses greater than -1280 bytes to not fragment as they should. Until there is a kernel -fix, named will work around this by setting IPV6_USE_MIN_MTU on a -per packet basis. [RT #24950] -
-
-
- -

Known issues in this release

- -
  • -

    - None. -

    -
-
- -

Thank You

- -

- Thank you to everyone who assisted us in making this release possible. - If you would like to contribute to ISC to assist us in continuing to make - quality open source software, please visit our donations page at - http://www.isc.org/supportisc. -

-
-
diff --git a/contrib/bind9/RELEASE-NOTES-BIND-9.8.1.pdf b/contrib/bind9/RELEASE-NOTES-BIND-9.8.1.pdf deleted file mode 100644 index b2b5de5df5bd..000000000000 Binary files a/contrib/bind9/RELEASE-NOTES-BIND-9.8.1.pdf and /dev/null differ diff --git a/contrib/bind9/RELEASE-NOTES-BIND-9.8.1.txt b/contrib/bind9/RELEASE-NOTES-BIND-9.8.1.txt deleted file mode 100644 index 3fdb9b0ac6e7..000000000000 --- a/contrib/bind9/RELEASE-NOTES-BIND-9.8.1.txt +++ /dev/null @@ -1,268 +0,0 @@ - __________________________________________________________________ - -Introduction - - BIND 9.8.1 is the current production release of BIND 9.8. - - This document summarizes changes from BIND 9.8.0 to BIND 9.8.1. Please - see the CHANGES file in the source code release for a complete list of - all changes. - -Download - - The latest versions of BIND 9 software can always be found on our web - site at http://www.isc.org/downloads/all. There you will find - additional information about each release, source code, and some - pre-compiled versions for certain operating systems. - -Support - - Product support information is available on - http://www.isc.org/services/support for paid support options. Free - support is provided by our user community via a mailing list. - Information on all public email lists is available at - https://lists.isc.org/mailman/listinfo. - -New Features - -9.8.1 - - * Added a new include file with function typedefs for the DLZ - "dlopen" driver. [RT #23629] - * Added a tool able to generate malformed packets to allow testing of - how named handles them. [RT #24096] - * The root key is now provided in the file bind.keys allowing DNSSEC - validation to be switched on at start up by adding - "dnssec-validation auto;" to named.conf. If the root key provided - has expired, named will log the expiration and validation will not - work. More information and the most current copy of bind.keys can - be found at http://www.isc.org/bind-keys. *Please note this feature - was actually added in 9.8.0 but was not included in the 9.8.0 - release notes. [RT #21727] - -Security Fixes - -9.8.1 - - * If named is configured with a response policy zone (RPZ) and a - query of type RRSIG is received for a name configured for RRset - replacement in that RPZ, it will trigger an INSIST and crash the - server. RRSIG. [RT #24280] - * named, set up to be a caching resolver, is vulnerable to a user - querying a domain with very large resource record sets (RRSets) - when trying to negatively cache the response. Due to an off-by-one - error, caching the response could cause named to crash. [RT #24650] - [CVE-2011-1910] - * Using Response Policy Zone (RPZ) to query a wildcard CNAME label - with QUERY type SIG/RRSIG, it can cause named to crash. Fix is - query type independant. [RT #24715] - * Using Response Policy Zone (RPZ) with DNAME records and querying - the subdomain of that label can cause named to crash. Now logs that - DNAME is not supported. [RT #24766] - * Change #2912 populated the message section in replies to UPDATE - requests, which some Windows clients wanted. This exposed a latent - bug that allowed the response message to crash named. With this - fix, change 2912 has been reduced to copy only the zone section to - the reply. A more complete fix for the latent bug will be released - later. [RT #24777] - -Feature Changes - -9.8.1 - - * Merged in the NetBSD ATF test framework (currently version 0.12) - for development of future unit tests. Use configure --with-atf to - build ATF internally or configure --with-atf=prefix to use an - external copy. [RT #23209] - * Added more verbose error reporting from DLZ LDAP. [RT #23402] - * The DLZ "dlopen" driver is now built by default, no longer - requiring a configure option. To disable it, use "configure - --without-dlopen". (Note: driver not supported on win32.) [RT - #23467] - * Replaced compile time constant with STDTIME_ON_32BITS. [RT #23587] - * Make --with-gssapi default for ./configure. [RT #23738] - * Improved the startup time for an authoritative server with a large - number of zones by making the zone task table of variable size - rather than fixed size. This means that authoritative servers with - lots of zones will be serving that zone data much sooner. [RT - #24406] - * Per RFC 6303, RFC 1918 reverse zones are now part of the built-in - list of empty zones. [RT #24990] - -Bug Fixes - -9.8.1 - - * During RFC5011 processing some journal write errors were not - detected. This could lead to managed-keys changes being committed - but not recorded in the journal files, causing potential - inconsistencies during later processing. [RT #20256] - * A potential NULL pointer deference in the DNS64 code could cause - named to terminate unexpectedly. [RT #20256] - * A state variable relating to DNSSEC could fail to be set during - some infrequently-executed code paths, allowing it to be used - whilst in an unitialized state during cache updates, with - unpredictable results. [RT #20256] - * A potential NULL pointer deference in DNSSEC signing code could - cause named to terminate unexpectedly [RT #20256] - * Several cosmetic code changes were made to silence warnings - generated by a static code analysis tool. [RT #20256] - * When using the -x (sign with only KSK) option on dnssec-signzone, - it could incorrectly count the number of ZSKs in the zone. (And in - 9.9.0, some code cleanup and improved warning messages). [RT - #20852] - * When using _builtin in named.conf, named.conf changes were not - found when reloading the config file. Now checks _builtin zone - arguments to see if the zone is re-usable or not. [RT #21914] - * Running dnssec-settime -f on an old-style key will now force the - key to be rewritten to the new key format even if no other change - has been specified, using "-P now -A now" as default values. [RT - #22474] - * After an external code review, a code cleanup was done. [RT #22521] - * Cause named to terminate at startup or rndc reconfig reload to - fail, if a log file specified in the conf file isn't a plain file. - (RT #22771] - * named now forces the ADB cache time for glue related data to zero - instead of relying on TTL. This corrects problematic behavior in - cases where a server was authoritative for the A record of a - nameserver for a delegated zone and was queried to recursively - resolve records within that zone. [RT #22842] - * When a validating resolver got a NODATA response for DNSKEY, it was - not caching the NODATA. Fixed and test added. [RT #22908] - * Fixed a bug in which zone keys that were published and but not - immediately activated, automatic signing could fail to trigger. [RT - #22911] - * Fixed precedence order bug with NS and DNAME records if both are - present. (Also fixed timing of autosign test in 9.7+) [RT #23035] - * When a DNSSEC signed dynamic zone's signatures need to be - refreshed, named would first delete the old signatures in the zone. - If a private key of the same algorithm isn't available to named, - the signing would fail but the old signatures would already be - deleted. named now checks if it can access the private key before - deleting the old signatures and leaves the old signature if no - private key is found. [RT #23136] - * When using "auto-dnssec maintain" and rolling to a new key, a - private-type record (only used internally by named) could be - created and not marked as complete. [RT #23253] - * Fixed last autosign test report. [RT #23256] - * named didn't save gid at startup and later assumed gid 0. named now - saves/restores the gid when creating creating named.pid at startup. - [RT #23290] - * If the server has an IPv6 address but does not have IPv6 - connectivity to the internet, dig +trace could fail attempting to - use IPv6 addresses. [RT #23297] - * If named is configured with managed zones, the managed key maint - timer can exercise a race condition that can crash the server. [RT - #23303] - * Changing TTL did not cause dnssec-signzone to generate new - signatures. [RT #23330] - * Have the validating resolver use RRSIG original TTL to compute - validated RRset and RRSIG TTL. [RT #23332] - * In "make test" bin/tests/resolver, hold the socket manager lock - while freeing the socket. [RT #23333] - * If named encountered a CNAME instead of a DS record when walking - the chain of trust down from the trust anchor, it incorrectly - stopped validating. [RT #23338] - * dns/view.h needed dns/rpz.h but it wasn't in the Makfile.in HEADERS - variable. [RT #23342] - * RRSIG records could have time stamps too far in the future. [RT - #23356] - * named stores cached data in an in-memory database and keeps track - of how recently the data is used with a heap. The heap is stored - within the cache's memory space. Under a sustained high query load - and with a small cache size, this could lead to the heap exhausting - the cache space. This would result in cache misses and SERVFAILs, - with named never releasing the cache memory the heap used up and - never recovering. This fix removes the heap into its own memory - space, preventing the heap from exhausting the cache space and - allowing named to recover gracefully when the high query load - abates. [RT #23371] - * Fully separated key management on a per view basis. [RT #23419] - * If running on a powerpc CPU and with atomic operations enabled, - named could lock up. Added sync instructions to the end of atomic - operations. [RT #23469] - * If OpenSSL was built without engine support, named would have - compile errors and fail to build. [RT #23473] - * If ./configure finds GOST but not elliptic curve, named fails to - build. Added elliptic curve support check in GOST OpenSSL engine - detection. [RT #23485] - * "rndc secroots" would abort on the first error and so could miss - remaining views. [RT #23488] - * Handle isc_event_allocate failures in t_tasks test. [RT #23572] - * ixfr-from-differences {master|slave}; failed to select the - master/slave zones, resulting in on diff/journal file being - created. [RT #23580] - * If a DNAME substitution failed, named returned NOERROR. The correct - response should be YXDOMAIN. [RT #23591] - * dns_dnssec_findzonekeys{2} used a inconsistant timestamp when - determining which keys are active. This could result in some RRsets - not being signed/re-signed. [RT #23642] - * Remove bin/tests/system/logfileconfig/ns1/named.conf and add - setup.sh in order to resolve changing named.conf issue. [RT #23687] - * NOTIFY messages were not being sent when generating a NSEC3 chain - incrementally. [RT #23702] - * DDNS updates using SIG(0) with update-policy match type "external" - could cause a crash. Also fixed nsupdate core dump on shutdown when - using a SIG(0) key, due to the key not being freed. [RT #23735] - * Zones using automatic key maintenance could fail to check the key - repository for updates. named now checks once per hour and the - automatic check bug has been fixed. [RT #23744] - * named now uses the correct strtok/strtok_r/strtok_s based on OS. - [RT #23747] - * Signatures for records at the zone apex could go stale due to an - incorrect timer setting. [RT #23769] - * The autosign tests attempted to open ports within reserved ranges. - Test now avoids those ports. [RT #23957] - * GSS TGIS test was failing, since log_cred() caused KRB5_KTNAME to - be cached. Now sets KRB5_KTNAME before calling log_cred() in - dst_gssapi_acceptctx(). [RT #24004] - * named, acting as authoritative server for DLZ zones, was not - correctly setting the authoritative (AA) bit. [RT #24146] - * Clean up some cross-compiling issues and added two undocumented - configure options, --with-gost and --with-rlimtype, to allow - over-riding default settings (gost=no and rlimtype="long int") when - cross-compiling. [RT #24367] - * When trying sign with NSEC3, if dnssec-signzone couldn't find the - KSK, it would give an incorrect error "NSEC3 iterations too big for - weakest DNSKEY strength" rather than the correct "failed to find - keys at the zone apex: not found" [RT #24369] - * Configuring 'dnssec-validation auto' in a view instead of in the - options statement could trigger an assertion failure in - named-checkconf. [RT #24382] - * Improved consistency checks for dnssec-enable and - dnssec-validation, added test cases to the checkconf system test. - [RT #24398] - * If named is configured to be both authoritative and recursive and - receives a recursive query for a CNAME in a zone that it is - authoritative for, if that CNAME also points to a zone the server - is authoritative for, the recursive part of name will not follow - the CNAME change and the response will not be a complete CNAME - chain. [RT #24455] - * nsupdate could dump core on shutdown when using SIG(0) keys. [RT - #24604] - * Named could fail to validate zones list in a DLV that validated - insecure without using DLV and had DS records in the parent zone. - [RT #24631] - * dnssec-signzone now records timestamps just before and just after - signing, improving the accuracy of signing statistics. [RT #16030] - * If allow-new-zones was set to yes and name-based ACLs were used, - named could crash when "rndc reconfig" was issued. [RT #22739] - * RT #23136 fixed a problem where named would delete old signatures - even when the private key wasn't available to re-sign the zone, - resulting in a zone with missing signatures. This fix (CHANGES - 3114) did not completely fix all issues. [RT #24577] - * A bug in FreeBSD kernels causes IPv6 UDP responses greater than - 1280 bytes to not fragment as they should. Until there is a kernel - fix, named will work around this by setting IPV6_USE_MIN_MTU on a - per packet basis. [RT #24950] - -Known issues in this release - - * None. - -Thank You - - Thank you to everyone who assisted us in making this release possible. - If you would like to contribute to ISC to assist us in continuing to - make quality open source software, please visit our donations page at - http://www.isc.org/supportisc. diff --git a/contrib/bind9/release-notes.css b/contrib/bind9/release-notes.css deleted file mode 100644 index 822214c1d610..000000000000 --- a/contrib/bind9/release-notes.css +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (C) 2010, 2011 Internet Systems Consortium, Inc. ("ISC") - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH - * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, - * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM - * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE - * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. - */ - -/* $Id$ */ - -body { - background-color: #ffffff; - color: #333333; - font-family: "Helvetica Neue", "ArialMT", "Verdana", "Arial", "Helvetica", sans-serif; - font-size: 14px; - line-height: 18px; - margin: 2em auto; - width: 700px; -} - -.command { - font-family: "Courier New", "Courier", monospace; - font-weight: normal; -} - -.note { - background-color: #ddeedd; - border: 1px solid #aaccaa; - margin: 1em 0 1em 0; - padding: 0.5em 1em 0.5em 1em; - -moz-border-radius: 10px; - -webkit-border-radius: 10px; -} - -.screen { - background-color: #ffffee; - border: 1px solid #ddddaa; - padding: 0.25em 1em 0.25em 1em; - margin: 1em 0 1em 0; - -moz-border-radius: 10px; - -webkit-border-radius: 10px; -} - -.section.title { - font-size: 150%; - font-weight: bold; -} - -.section.section.title { - font-size: 130%; - font-weight: bold; -} diff --git a/contrib/file/ChangeLog b/contrib/file/ChangeLog index 90a32fcecffb..4c19cc6034ba 100644 --- a/contrib/file/ChangeLog +++ b/contrib/file/ChangeLog @@ -1,3 +1,309 @@ +2012-02-20 17:33 Christos Zoulas + + * Fix CDF parsing issues found by CERT's fuzzing tool (Will Dormann) + +2011-12-15 12:17 Chris Metcalf + + * Support Tilera architectures (tile64, tilepro, tilegx). + +2011-12-16 16:33 Reuben Thomas + + * Add magic for /usr/bin/env Perl scripts + * Weaken generic script magic to avoid clashing with + language-specific magic. + +2011-12-08 13:37 Reuben Thomas + + * Simplify if (p) free(p) to free(p). + +2011-12-08 13:07 Reuben Thomas + + * Remove hardwired token finding (names.h), turning it into soft + magic. Patterns are either anchored regexs or search/8192. English + language detection and PL/1 detection have been removed as they + were too fragile. -e tokens is still accepted for backwards + compatibility. + * Move 3ds patterns (which are commented out anyway) into autodesk + (they were, oddly, in c-lang). + +2011-12-06 00:16 Reuben Thomas + + * Tweak strength of generic hash-bang detectors to be less than + specific ones. + * Make an inconsistent description of Python scripts consistent. + +2011-12-05 23:58 Reuben Thomas + + * Fix minor error in file(1). + +2011-11-05 00:00 Reuben Thomas + + * Fix issue #150 (I hope). + +2011-09-22 12:57 Christos Zoulas + + * Python3 binding fixes from Kelly Anderson + +2011-09-20 11:32 Christos Zoulas + + * If a string type magic entry is marked as text or binary + only match text files against text entries and binary + files against binary entries. + +2011-09-01 12:12 Christos Zoulas + + * Don't wait for any subprocess, just the one we forked. + +2011-08-26 16:40 Christos Zoulas + + * If the application name is not set in a cdf file, try to see + if it has a directory with the application name on it. + +2011-08-17 14:32 Christos Zoulas + + * Fix ELF lseek(2) madness. Inspired by PR/134 by Jan Kaluza + +2011-08-14 09:03 Christos Zoulas + + * Don't use variable string formats. + +2011-07-12 12:32 Reuben Thomas + + * Fix detection of Zip files (Mantis #128). + * Make some minor improvements to file(1). + * Rename MIME types for filesystem objects for consistency with + xdg-utils. Typically this means that application/x-foo becomes + inode/foo, but some names also change slightly, e.g. + application/x-character-device becomes inode/chardevice. + +2011-05-10 20:57 Christos Zoulas + + * fix mingw compilation (Abradoks) + +2011-05-10 20:57 Christos Zoulas + + * remove patchlevel.h + * Fix read past allocated memory caused by double-incrementing + a pointer in a loop (reported by Roberto Maar) + +2011-03-30 15:45 Christos Zoulas + + * Fix cdf string buffer setting (Sven Anders) + +2011-03-20 16:35 Christos Zoulas + + * Eliminate MAXPATHLEN and use dynamic allocation for + path and file buffers. + +2011-03-15 18:15 Christos Zoulas + + * binary tests on magic entries with masks could spuriously + get converted to ascii. + +2011-03-12 18:06 Reuben Thomas + + * Improve file.man (remove BUGS, present email addresses consistently). + +2011-03-07 19:38 Christos Zoulas + + * add lrzip support (from Ville Skytta) + +2011-02-10 16:36 Christos Zoulas + + * fix CDF bounds checking (Guy Helmer) + +2011-02-10 12:03 Christos Zoulas + + * add cdf_ctime() that prints a meaningful error when time cannot + be converted. + +2011-02-02 20:40 Christos Zoulas + + * help and version output to stdout. + + * When matching softmagic for ascii files, don't just print + the softmagic classification, keep going and print the + text classification too. This fixes broken troff files when + we moved them from keyword recognition to softmagic + (they stopped printing "with CRLF" etc.) + Reported by Doug McIlroy. + +2011-01-16 19:31 Reuben Thomas + + * Fix two potential buffer overruns in apprentice_list. + +2011-01-14 22:33 Reuben Thomas + + * New Python binding in pure Python. + * Update libmagic(3). + +2011-01-06 21:40 Reuben Thomas + + * Fix Python bindings (including recent Python 3 compatibility + update). + +2011-01-04 18:43 Reuben Thomas + + * magic/Makefile.am: make it easier to recover from magic build failures. + * Fix pstring length specifier parsing to avoid generating invalid + magic files. + * Add pstring length "J" (for "JPEG") to specify that the length + include itself. + * Fix JPEG comment parsing at last using pstring/HJ! + * Ignore section 5 man pages in doc/.cvsignore. + +2010-12-22 13:12 Christos Zoulas + + * Add pstring/BHhLl to specify the type of the length of pascal + strings. + +2010-11-26 18:39 Reuben Thomas + + * Fix "-e soft": it was ignored when softmagic was called + during asciimagic. + * Improve comments and use "unsigned char" in tar.h/is_tar.c. + +2010-11-05 17:26 Reuben Thomas + + * Make bug reporting addresses more visible. + +2010-11-01 18:35 Reuben Thomas + + * Add tcl magic from Gustaf Neumann + +2010-10-24 10:42 Christos Zoulas + + * Fix the whitespace comparing code (Christopher Chittleborough) + +2010-10-06 21:05 Christos Zoulas + + * allow string/t to work (Jan Kaluza) + +2010-09-20 22:11 Reuben Thomas + + * Apply some patches from Ubuntu and Fedora. + +2010-09-20 21:16 Reuben Thomas + + * Apply all patches from Debian package 5.04-6 which have not + already been applied and are not Debian-specific. + +2010-09-20 15:24 Reuben Thomas + + * Minor security fix to softmagic.c (don't use untrusted + string as printf format). + +2010-07-21 12:20 Christos Zoulas + + * MINGW32 portability from LRN + + * Don't warn about escaping magic regex chars when we are in a regex. + +2010-07-19 10:55 Christos Zoulas + + * Only try to print prpsinfo for core files. (Jan Kaluza) + +2010-04-22 12:55 Christos Zoulas + + * Try more elf offsets for Debian core files. (Arnaud Giersch) + +2010-02-20 15:18 Reuben Thomas + + * Clarify which sort of CDF we mean. + +2010-02-14 22:58 Reuben Thomas + + * Re-jig Zip file type magic so that unsupported special + Zip types (those with "mimetype" at offset 30) can be + recognized. + +2010-02-02 21:50 Reuben Thomas + + * Add support for OCF (EPUB) files (application/epub+zip) + +2010-01-28 18:25 Christos Zoulas + + * Fix core-dump from unbound loop: + https://bugzilla.redhat.com/show_bug.cgi?id=533245 + +2010-01-22 15:45 Christos Zoulas + + * print proper mime for crystal reports file + + * print the last summary information of a cdf document, not the + first so that nested documents print the right info + +2010-01-16 18:42 Charles Longeau + + * bring back some fixes from OpenBSD: + - make gcc2 builds file + - fix typos in a magic file comment + +2009-11-17 18:35 Christos Zoulas + + * ctime/asctime can return NULL on some OS's although + they should not (Toshit Antani) + +2009-09-14 13:49 Christos Zoulas + + * Centralize magic path handling routines and remove the + special-casing from file.c so that the python module for + example comes up with the same magic path (Fixes ~/.magic + handling) (from Gab) + +2009-09-11 23:38 Reuben Thomas + + * When magic argument is a directory, read the files in + strcmp-sorted order (fixes Debian bug #488562 and our own FIXME). + +2009-09-11 13:11 Reuben Thomas + + * Combine overlapping epoc and psion magic files into one (epoc). + + * Add some more EPOC MIME types. + +2009-08-19 15:55 Christos Zoulas + + * Fix 3 bugs (From Ian Darwin): + - file_showstr could move one past the end of the array + - parse_apple did not nul terminate the string in the overflow case + - parse_mime truncated the wrong string in the overflow case + +2009-08-12 12:28 Robert Byrnes + + * Include Localstuff when compiling magic. + +2009-07-15 10:05 Christos Zoulas + + * Fix logic for including mygetopts.h + + * Make cdf.c compile again with debugging + + * Add the necessary field handling for crystal reports files to work + +2009-06-23 01:34 Reuben Thomas + + * Stop "(if" identifying Lisp files, that's plain dumb! + +2009-06-09 22:13 Reuben Thomas + + * Add a couple of missing MP3 MIME types. + +2009-05-27 23:00 Reuben Thomas + + * Add full range of hash-bang tests for Python and Ruby. + + * Add MIME types for Python and Ruby scripts. + +2009-05-13 10:44 Christos Zoulas + + * off by one in parsing hw capabilities in elf + (Cheng Renquan) + +2009-05-08 13:40 Christos Zoulas + + * lint fixes and more from NetBSD + 2009-05-06 10:25 Christos Zoulas * Avoid null dereference in cdf code (Drew Yao) diff --git a/contrib/file/Header b/contrib/file/Header index 3ca9b0eb2cc7..831122e27a80 100644 --- a/contrib/file/Header +++ b/contrib/file/Header @@ -1,5 +1,5 @@ -# Magic # Magic data for file(1) command. -# Machine-generated from src/cmd/file/magdir/*; edit there only! # Format is described in magic(files), where: -# files is 5 on V7 and BSD, 4 on SV, and ?? in the SVID. +# files is 5 on V7 and BSD, 4 on SV, and ?? on SVID. +# Don't edit this file, edit /etc/magic or send your magic improvements +# to the maintainers, at file@mx.gw.com diff --git a/contrib/file/INSTALL b/contrib/file/INSTALL index 5458714e1e2c..7d1c323beae7 100644 --- a/contrib/file/INSTALL +++ b/contrib/file/INSTALL @@ -2,18 +2,24 @@ Installation Instructions ************************* Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, -2006 Free Software Foundation, Inc. +2006, 2007, 2008, 2009 Free Software Foundation, Inc. -This file is free documentation; the Free Software Foundation gives -unlimited permission to copy, distribute and modify it. + Copying and distribution of this file, with or without modification, +are permitted in any medium without royalty provided the copyright +notice and this notice are preserved. This file is offered as-is, +without warranty of any kind. Basic Installation ================== -Briefly, the shell commands `./configure; make; make install' should + Briefly, the shell commands `./configure; make; make install' should configure, build, and install this package. The following more-detailed instructions are generic; see the `README' file for -instructions specific to this package. +instructions specific to this package. Some packages provide this +`INSTALL' file but do not implement all of the features documented +below. The lack of an optional feature in a given package is not +necessarily a bug. More recommendations for GNU packages can be found +in *note Makefile Conventions: (standards)Makefile Conventions. The `configure' shell script attempts to guess correct values for various system-dependent variables used during compilation. It uses @@ -42,7 +48,7 @@ may remove or edit it. you want to change it or regenerate `configure' using a newer version of `autoconf'. -The simplest way to compile this package is: + The simplest way to compile this package is: 1. `cd' to the directory containing the package's source code and type `./configure' to configure the package for your system. @@ -53,12 +59,22 @@ The simplest way to compile this package is: 2. Type `make' to compile the package. 3. Optionally, type `make check' to run any self-tests that come with - the package. + the package, generally using the just-built uninstalled binaries. 4. Type `make install' to install the programs and any data files and - documentation. + documentation. When installing into a prefix owned by root, it is + recommended that the package be configured and built as a regular + user, and only the `make install' phase executed with root + privileges. - 5. You can remove the program binaries and object files from the + 5. Optionally, type `make installcheck' to repeat any self-tests, but + this time using the binaries in their final installed location. + This target does not install anything. Running this target as a + regular user, particularly if the prior `make install' required + root privileges, verifies that the installation completed + correctly. + + 6. You can remove the program binaries and object files from the source code directory by typing `make clean'. To also remove the files that `configure' created (so you can compile the package for a different kind of computer), type `make distclean'. There is @@ -67,12 +83,22 @@ The simplest way to compile this package is: all sorts of other programs in order to regenerate files that came with the distribution. + 7. Often, you can also type `make uninstall' to remove the installed + files again. In practice, not all packages have tested that + uninstallation works correctly, even though it is required by the + GNU Coding Standards. + + 8. Some packages, particularly those that use Automake, provide `make + distcheck', which can by used by developers to test that all other + targets like `make install' and `make uninstall' work correctly. + This target is generally not run by end users. + Compilers and Options ===================== -Some systems require unusual options for compilation or linking that the -`configure' script does not know about. Run `./configure --help' for -details on some of the pertinent environment variables. + Some systems require unusual options for compilation or linking that +the `configure' script does not know about. Run `./configure --help' +for details on some of the pertinent environment variables. You can give `configure' initial values for configuration parameters by setting variables in the command line or in the environment. Here @@ -85,25 +111,41 @@ is an example: Compiling For Multiple Architectures ==================================== -You can compile the package for more than one kind of computer at the + You can compile the package for more than one kind of computer at the same time, by placing the object files for each architecture in their own directory. To do this, you can use GNU `make'. `cd' to the directory where you want the object files and executables to go and run the `configure' script. `configure' automatically checks for the -source code in the directory that `configure' is in and in `..'. +source code in the directory that `configure' is in and in `..'. This +is known as a "VPATH" build. With a non-GNU `make', it is safer to compile the package for one architecture at a time in the source code directory. After you have installed the package for one architecture, use `make distclean' before reconfiguring for another architecture. + On MacOS X 10.5 and later systems, you can create libraries and +executables that work on multiple system types--known as "fat" or +"universal" binaries--by specifying multiple `-arch' options to the +compiler but only a single `-arch' option to the preprocessor. Like +this: + + ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ + CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ + CPP="gcc -E" CXXCPP="g++ -E" + + This is not guaranteed to produce working output in all cases, you +may have to build one architecture at a time and combine the results +using the `lipo' tool if you have problems. + Installation Names ================== -By default, `make install' installs the package's commands under + By default, `make install' installs the package's commands under `/usr/local/bin', include files under `/usr/local/include', etc. You can specify an installation prefix other than `/usr/local' by giving -`configure' the option `--prefix=PREFIX'. +`configure' the option `--prefix=PREFIX', where PREFIX must be an +absolute file name. You can specify separate installation prefixes for architecture-specific files and architecture-independent files. If you @@ -114,16 +156,47 @@ Documentation and other data files still use the regular prefix. In addition, if you use an unusual directory layout you can give options like `--bindir=DIR' to specify different values for particular kinds of files. Run `configure --help' for a list of the directories -you can set and what kinds of files go in them. +you can set and what kinds of files go in them. In general, the +default for these options is expressed in terms of `${prefix}', so that +specifying just `--prefix' will affect all of the other directory +specifications that were not explicitly provided. + + The most portable way to affect installation locations is to pass the +correct locations to `configure'; however, many packages provide one or +both of the following shortcuts of passing variable assignments to the +`make install' command line to change installation locations without +having to reconfigure or recompile. + + The first method involves providing an override variable for each +affected directory. For example, `make install +prefix=/alternate/directory' will choose an alternate location for all +directory configuration variables that were expressed in terms of +`${prefix}'. Any directories that were specified during `configure', +but not in terms of `${prefix}', must each be overridden at install +time for the entire installation to be relocated. The approach of +makefile variable overrides for each directory variable is required by +the GNU Coding Standards, and ideally causes no recompilation. +However, some platforms have known limitations with the semantics of +shared libraries that end up requiring recompilation when using this +method, particularly noticeable in packages that use GNU Libtool. + + The second method involves providing the `DESTDIR' variable. For +example, `make install DESTDIR=/alternate/directory' will prepend +`/alternate/directory' before all installation names. The approach of +`DESTDIR' overrides is not required by the GNU Coding Standards, and +does not work on platforms that have drive letters. On the other hand, +it does better at avoiding recompilation issues, and works well even +when some directory options were not specified in terms of `${prefix}' +at `configure' time. + +Optional Features +================= If the package supports it, you can cause programs to be installed with an extra prefix or suffix on their names by giving `configure' the option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. -Optional Features -================= - -Some packages pay attention to `--enable-FEATURE' options to + Some packages pay attention to `--enable-FEATURE' options to `configure', where FEATURE indicates an optional part of the package. They may also pay attention to `--with-PACKAGE' options, where PACKAGE is something like `gnu-as' or `x' (for the X Window System). The @@ -135,14 +208,53 @@ find the X include and library files automatically, but if it doesn't, you can use the `configure' options `--x-includes=DIR' and `--x-libraries=DIR' to specify their locations. + Some packages offer the ability to configure how verbose the +execution of `make' will be. For these packages, running `./configure +--enable-silent-rules' sets the default to minimal output, which can be +overridden with `make V=1'; while running `./configure +--disable-silent-rules' sets the default to verbose, which can be +overridden with `make V=0'. + +Particular systems +================== + + On HP-UX, the default C compiler is not ANSI C compatible. If GNU +CC is not installed, it is recommended to use the following options in +order to use an ANSI C compiler: + + ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" + +and if that doesn't work, install pre-built binaries of GCC for HP-UX. + + On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot +parse its `' header file. The option `-nodtk' can be used as +a workaround. If GNU CC is not installed, it is therefore recommended +to try + + ./configure CC="cc" + +and if that doesn't work, try + + ./configure CC="cc -nodtk" + + On Solaris, don't put `/usr/ucb' early in your `PATH'. This +directory contains several dysfunctional programs; working variants of +these programs are available in `/usr/bin'. So, if you need `/usr/ucb' +in your `PATH', put it _after_ `/usr/bin'. + + On Haiku, software installed for all users goes in `/boot/common', +not `/usr/local'. It is recommended to use the following options: + + ./configure --prefix=/boot/common + Specifying the System Type ========================== -There may be some features `configure' cannot figure out automatically, -but needs to determine by the type of machine the package will run on. -Usually, assuming the package is built to be run on the _same_ -architectures, `configure' can figure that out, but if it prints a -message saying it cannot guess the machine type, give it the + There may be some features `configure' cannot figure out +automatically, but needs to determine by the type of machine the package +will run on. Usually, assuming the package is built to be run on the +_same_ architectures, `configure' can figure that out, but if it prints +a message saying it cannot guess the machine type, give it the `--build=TYPE' option. TYPE can either be a short name for the system type, such as `sun4', or a canonical name which has the form: @@ -150,7 +262,8 @@ type, such as `sun4', or a canonical name which has the form: where SYSTEM can have one of these forms: - OS KERNEL-OS + OS + KERNEL-OS See the file `config.sub' for the possible values of each field. If `config.sub' isn't included in this package, then this package doesn't @@ -168,9 +281,9 @@ eventually be run) with `--host=TYPE'. Sharing Defaults ================ -If you want to set default values for `configure' scripts to share, you -can create a site shell script called `config.site' that gives default -values for variables like `CC', `cache_file', and `prefix'. + If you want to set default values for `configure' scripts to share, +you can create a site shell script called `config.site' that gives +default values for variables like `CC', `cache_file', and `prefix'. `configure' looks for `PREFIX/share/config.site' if it exists, then `PREFIX/etc/config.site' if it exists. Or, you can set the `CONFIG_SITE' environment variable to the location of the site script. @@ -179,7 +292,7 @@ A warning: not all `configure' scripts look for a site script. Defining Variables ================== -Variables not defined in a site shell script can be set in the + Variables not defined in a site shell script can be set in the environment passed to `configure'. However, some packages may run configure again during the build, and the customized values of these variables may be lost. In order to avoid this problem, you should set @@ -198,11 +311,19 @@ an Autoconf bug. Until the bug is fixed you can use this workaround: `configure' Invocation ====================== -`configure' recognizes the following options to control how it operates. + `configure' recognizes the following options to control how it +operates. `--help' `-h' - Print a summary of the options to `configure', and exit. + Print a summary of all of the options to `configure', and exit. + +`--help=short' +`--help=recursive' + Print a summary of the options unique to this package's + `configure', and exit. The `short' variant lists options used + only in the top level, while the `recursive' variant lists options + also present in any nested packages. `--version' `-V' @@ -229,6 +350,16 @@ an Autoconf bug. Until the bug is fixed you can use this workaround: Look for the package's source code in directory DIR. Usually `configure' can determine that directory automatically. +`--prefix=DIR' + Use DIR as the installation prefix. *note Installation Names:: + for more details, including other options available for fine-tuning + the installation locations. + +`--no-create' +`-n' + Run the configure checks, but stop before creating any output + files. + `configure' also accepts some other, not widely useful, options. Run `configure --help' for more details. diff --git a/contrib/file/Magdir/acorn b/contrib/file/Magdir/acorn index 5acb31ee2043..4202be912419 100644 --- a/contrib/file/Magdir/acorn +++ b/contrib/file/Magdir/acorn @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: acorn,v 1.5 2009/09/19 16:28:07 christos Exp $ # acorn: file(1) magic for files found on Acorn systems # diff --git a/contrib/file/Magdir/adi b/contrib/file/Magdir/adi index 12d7da59bcde..2fe79d443119 100644 --- a/contrib/file/Magdir/adi +++ b/contrib/file/Magdir/adi @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: adi,v 1.4 2009/09/19 16:28:07 christos Exp $ # adi: file(1) magic for ADi's objects # From Gregory McGarry # diff --git a/contrib/file/Magdir/adventure b/contrib/file/Magdir/adventure index 7b30c496144e..febc2077f129 100644 --- a/contrib/file/Magdir/adventure +++ b/contrib/file/Magdir/adventure @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: adventure,v 1.13 2010/12/31 16:32:54 christos Exp $ # adventure: file(1) magic for Adventure game files # # from Allen Garvin @@ -16,18 +17,26 @@ # Infocom (see z-machine) #------------------------------------------------------------------------------ # Z-machine: file(1) magic for Z-machine binaries. +# Updated by Adam Buchbinder # -# This will match ${TEX_BASE}/texmf/omega/ocp/char2uni/inbig5.ocp which -# appears to be a version-0 Z-machine binary. +#http://www.gnelson.demon.co.uk/zspec/sect11.html +#http://www.jczorkmid.net/~jpenney/ZSpec11-latest.txt +#http://en.wikipedia.org/wiki/Z-machine +# The first byte is the Z-machine revision; it is always between 1 and 8. We +# had false matches (for instance, inbig5.ocp from the Omega TeX extension as +# well as an occasional MP3 file), so we sanity-check the version number. # -# The (false match) message is to correct that behavior. Perhaps it is -# not needed. +# It might be possible to sanity-check the release number as well, as it seems +# (at least in classic Infocom games) to always be a relatively small number, +# always under 150 or so, but as this isn't rigorous, we'll wait on that until +# it becomes clear that it's needed. # -16 belong&0xfe00f0f0 0x3030 Infocom game data ->0 ubyte 0 (false match) ->0 ubyte >0 (Z-machine %d, ->>2 ubeshort x Release %d / ->>18 string >\0 Serial %.6s) +0 ubyte >0 +>0 ubyte <9 +>>16 belong&0xfe00f0f0 0x3030 Infocom game data +>>>0 ubyte x (Z-machine %d, +>>>>2 ubeshort x Release %d / +>>>>18 string >\0 Serial %.6s) #------------------------------------------------------------------------------ # Glulx: file(1) magic for Glulx binaries. @@ -45,10 +54,9 @@ # For Quetzal and blorb magic see iff -# TADS (Text Adventure Development System) +# TADS (Text Adventure Development System) version 2 # All files are machine-independent (games compile to byte-code) and are tagged -# with a version string of the form "V2..\0" (but TADS 3 is -# on the way). +# with a version string of the form "V2..\0". # Game files start with "TADS2 bin\n\r\032\0" then the compiler version. 0 string TADS2\ bin TADS >9 belong !0x0A0D1A00 game data, CORRUPTED @@ -73,6 +81,19 @@ >10 belong 0x0A0D1A00 >>14 string >\0 %s saved game data +# TADS (Text Adventure Development System) version 3 +# Game files start with "T3-image\015\012\032" +0 string T3-image\015\012\032 +>11 leshort x TADS 3 game data (format version %d) +# Saved game files start with "T3-state-v####\015\012\032" +# where #### is a format version number +0 string T3-state-v +>14 string \015\012\032 TADS 3 saved game data (format version +>>10 byte x %c +>>11 byte x \b%c +>>12 byte x \b%c +>>13 byte x \b%c) + # Danny Milosavljevic # this are adrift (adventure game standard) game files, extension .taf # depending on version magic continues with 0x93453E6139FA (V 4.0) diff --git a/contrib/file/Magdir/allegro b/contrib/file/Magdir/allegro index 41f4ad6cd236..b937c9cb0254 100644 --- a/contrib/file/Magdir/allegro +++ b/contrib/file/Magdir/allegro @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: allegro,v 1.4 2009/09/19 16:28:07 christos Exp $ # allegro: file(1) magic for Allegro datafiles # Toby Deshane # diff --git a/contrib/file/Magdir/alliant b/contrib/file/Magdir/alliant index 69cf4b44d742..962020238e17 100644 --- a/contrib/file/Magdir/alliant +++ b/contrib/file/Magdir/alliant @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: alliant,v 1.7 2009/09/19 16:28:07 christos Exp $ # alliant: file(1) magic for Alliant FX series a.out files # # If the FX series is the one that had a processor with a 68K-derived diff --git a/contrib/file/Magdir/alpha b/contrib/file/Magdir/alpha deleted file mode 100644 index c0191fb931e4..000000000000 --- a/contrib/file/Magdir/alpha +++ /dev/null @@ -1,30 +0,0 @@ -#------------------------------------------------------------------------------ -# alpha architecture description -# - -0 leshort 0603 COFF format alpha ->22 leshort&030000 !020000 executable ->24 leshort 0410 pure ->24 leshort 0413 paged ->22 leshort&020000 !0 dynamically linked ->16 lelong !0 not stripped ->16 lelong 0 stripped ->22 leshort&030000 020000 shared library ->24 leshort 0407 object ->27 byte x - version %d ->26 byte x .%d ->28 byte x -%d - -# Basic recognition of Digital UNIX core dumps - Mike Bremford -# -# The actual magic number is just "Core", followed by a 2-byte version -# number; however, treating any file that begins with "Core" as a Digital -# UNIX core dump file may produce too many false hits, so we include one -# byte of the version number as well; DU 5.0 appears only to be up to -# version 2. -# -0 string Core\001 Alpha COFF format core dump (Digital UNIX) ->24 string >\0 \b, from '%s' -0 string Core\002 Alpha COFF format core dump (Digital UNIX) ->24 string >\0 \b, from '%s' - diff --git a/contrib/file/Magdir/amanda b/contrib/file/Magdir/amanda index 2b9565083ee0..395ef545c2ba 100644 --- a/contrib/file/Magdir/amanda +++ b/contrib/file/Magdir/amanda @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: amanda,v 1.5 2009/09/19 16:28:07 christos Exp $ # amanda: file(1) magic for amanda file format # 0 string AMANDA:\ AMANDA diff --git a/contrib/file/Magdir/amigaos b/contrib/file/Magdir/amigaos index bb98299d89ff..f4c9cf265317 100644 --- a/contrib/file/Magdir/amigaos +++ b/contrib/file/Magdir/amigaos @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: amigaos,v 1.14 2009/09/19 16:28:07 christos Exp $ # amigaos: file(1) magic for AmigaOS binary formats: # diff --git a/contrib/file/Magdir/animation b/contrib/file/Magdir/animation index 46b23ecd507c..c09a26ac85e3 100644 --- a/contrib/file/Magdir/animation +++ b/contrib/file/Magdir/animation @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: animation,v 1.45 2011/09/06 11:00:06 christos Exp $ # animation: file(1) magic for animation/movie formats # # animation formats @@ -29,7 +30,7 @@ #!:mime image/x-quicktime 4 string pckg Apple QuickTime compressed archive !:mime application/x-quicktime-player -4 string/B jP JPEG 2000 image +4 string/W jP JPEG 2000 image !:mime image/jp2 4 string ftyp ISO Media >8 string isom \b, MPEG v4 system, version 1 @@ -41,10 +42,18 @@ !:mime video/mp4 >8 string mp7t \b, MPEG v4 system, MPEG v7 XML >8 string mp7b \b, MPEG v4 system, MPEG v7 binary XML ->8 string/B jp2 \b, JPEG 2000 +>8 string/W jp2 \b, JPEG 2000 !:mime image/jp2 +>8 string 3ge \b, MPEG v4 system, 3GPP +!:mime video/3gpp +>8 string 3gg \b, MPEG v4 system, 3GPP +!:mime video/3gpp >8 string 3gp \b, MPEG v4 system, 3GPP !:mime video/3gpp +>8 string 3gs \b, MPEG v4 system, 3GPP +!:mime video/3gpp +>8 string 3g2 \b, MPEG v4 system, 3GPP2 +!:mime video/3gpp2 >>11 byte 4 \b v4 (H.263/AMR GSM 6.10) >>11 byte 5 \b v5 (H.263/AMR GSM 6.10) >>11 byte 6 \b v6 (ITU H.264/AMR GSM 6.10) @@ -52,13 +61,13 @@ !:mime video/mp4 >8 string avc1 \b, MPEG v4 system, 3GPP JVT AVC !:mime video/3gpp ->8 string/B M4A \b, MPEG v4 system, iTunes AAC-LC +>8 string/W M4A \b, MPEG v4 system, iTunes AAC-LC !:mime audio/mp4 ->8 string/B M4V \b, MPEG v4 system, iTunes AVC-LC +>8 string/W M4V \b, MPEG v4 system, iTunes AVC-LC !:mime video/mp4 ->8 string/B M4P \b, MPEG v4 system, iTunes AES encrypted ->8 string/B M4B \b, MPEG v4 system, iTunes bookmarked ->8 string/B qt \b, Apple QuickTime movie +>8 string/W M4P \b, MPEG v4 system, iTunes AES encrypted +>8 string/W M4B \b, MPEG v4 system, iTunes bookmarked +>8 string/W qt \b, Apple QuickTime movie !:mime video/quicktime # MPEG sequences @@ -71,6 +80,7 @@ >>7 byte x \b @ L %u 0 belong&0xFFFFFF00 0x00000100 >3 byte 0xBA MPEG sequence +!:mime video/mpeg >>4 byte &0x40 \b, v2, program multiplex >>4 byte ^0x40 \b, v1, system multiplex >3 byte 0xBB MPEG sequence, v1/2, multiplex (missing pack header) @@ -80,6 +90,7 @@ >>4 byte 88 \b, extended >>6 byte x \b @ L %u >3 byte 0xB0 MPEG sequence, v4 +!:mime video/mpeg4-generic >>5 belong 0x000001B5 >>>9 byte &0x80 >>>>10 byte&0xF0 16 \b, video @@ -149,6 +160,7 @@ >>4 byte 252 \b, FGS @ L4 >>4 byte 253 \b, FGS @ L5 >3 byte 0xB5 MPEG sequence, v4 +!:mime video/mpeg4-generic >>4 byte &0x80 >>>5 byte&0xF0 16 \b, video (missing profile header) >>>5 byte&0xF0 32 \b, still texture (missing profile header) @@ -159,6 +171,7 @@ >>4 byte&0xF8 24 \b, mesh (missing profile header) >>4 byte&0xF8 32 \b, face (missing profile header) >3 byte 0xB3 MPEG sequence +!:mime video/mpeg >>12 belong 0x000001B8 \b, v1, progressive Y'CbCr 4:2:0 video >>12 belong 0x000001B2 \b, v1, progressive Y'CbCr 4:2:0 video >>12 belong 0x000001B5 \b, v2, @@ -469,6 +482,7 @@ # MPA, M2A 0 beshort&0xFFFE 0xFFF6 MPEG ADTS, layer I, v2 +!:mime audio/mpeg # rate >2 byte&0xF0 0x10 \b, 32 kbps >2 byte&0xF0 0x20 \b, 48 kbps @@ -503,6 +517,7 @@ # MP3, M25A 0 beshort&0xFFFE 0xFFE2 MPEG ADTS, layer III, v2.5 +!:mime audio/mpeg # rate >2 byte&0xF0 0x10 \b, 8 kbps >2 byte&0xF0 0x20 \b, 16 kbps @@ -697,6 +712,7 @@ # Microsoft Advanced Streaming Format (ASF) 0 belong 0x3026b275 Microsoft ASF +!:mime video/x-ms-asf # MNG Video Format, 0 string \x8aMNG MNG video data, @@ -718,16 +734,16 @@ 3 string \x0D\x0AVersion:Vivo Vivo video data # VRML (Virtual Reality Modelling Language) -0 string/b #VRML\ V1.0\ ascii VRML 1 file +0 string/w #VRML\ V1.0\ ascii VRML 1 file !:mime model/vrml -0 string/b #VRML\ V2.0\ utf8 ISO/IEC 14772 VRML 97 file +0 string/w #VRML\ V2.0\ utf8 ISO/IEC 14772 VRML 97 file !:mime model/vrml # X3D (Extensible 3D) [http://www.web3d.org/specifications/x3d-3.0.dtd] # From Michel Briand -0 string \20 search/1000/cb \20 search/1000/cw \ 2008-07-18 0 string BIK Bink Video >3 regex =[a-z] rev.%s @@ -813,3 +830,66 @@ >>51 byte&0x20 !0 stereo #>>51 byte&0x10 0 FFT #>>51 byte&0x10 !0 DCT + +# Type: NUT Container +# URL: http://wiki.multimedia.cx/index.php?title=NUT +# From: Adam Buchbinder +0 string nut/multimedia\ container\0 NUT multimedia container + +# Type: Nullsoft Video (NSV) +# URL: http://wiki.multimedia.cx/index.php?title=Nullsoft_Video +# From: Mike Melanson +0 string NSVf Nullsoft Video + +# Type: REDCode Video +# URL: http://www.red.com/ ; http://wiki.multimedia.cx/index.php?title=REDCode +# From: Mike Melanson +4 string RED1 REDCode Video + +# Type: MTV Multimedia File +# URL: http://wiki.multimedia.cx/index.php?title=MTV +# From: Mike Melanson +0 string AMVS MTV Multimedia File + +# Type: ARMovie +# URL: http://wiki.multimedia.cx/index.php?title=ARMovie +# From: Mike Melanson +0 string ARMovie\012 ARMovie + +# Type: Interplay MVE Movie +# URL: http://wiki.multimedia.cx/index.php?title=Interplay_MVE +# From: Mike Melanson +0 string Interplay\040MVE\040File\032 Interplay MVE Movie + +# Type: Windows Television DVR File +# URL: http://wiki.multimedia.cx/index.php?title=WTV +# From: Mike Melanson +# This takes the form of a Windows-style GUID +0 bequad 0xB7D800203749DA11 +>8 bequad 0xA64E0007E95EAD8D Windows Television DVR Media + +# Type: Sega FILM/CPK Multimedia +# URL: http://wiki.multimedia.cx/index.php?title=Sega_FILM +# From: Mike Melanson +0 string FILM Sega FILM/CPK Multimedia, +>32 belong x %d x +>28 belong x %d + +# Type: Nintendo THP Multimedia +# URL: http://wiki.multimedia.cx/index.php?title=THP +# From: Mike Melanson +0 string THP\0 Nintendo THP Multimedia + +# Type: BBC Dirac Video +# URL: http://wiki.multimedia.cx/index.php?title=Dirac +# From: Mike Melanson +0 string BBCD BBC Dirac Video + +# Type: RAD Game Tools Smacker Multimedia +# URL: http://wiki.multimedia.cx/index.php?title=Smacker +# From: Mike Melanson +0 string SMK RAD Game Tools Smacker Multimedia +>3 byte x version %c, +>4 lelong x %d x +>8 lelong x %d, +>12 lelong x %d frames diff --git a/contrib/file/Magdir/apl b/contrib/file/Magdir/apl index 040043191938..d717e377dc3e 100644 --- a/contrib/file/Magdir/apl +++ b/contrib/file/Magdir/apl @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: apl,v 1.6 2009/09/19 16:28:07 christos Exp $ # apl: file(1) magic for APL (see also "pdp" and "vax" for other APL # workspaces) # diff --git a/contrib/file/Magdir/apple b/contrib/file/Magdir/apple index 669ab04dcc6f..dad3eee9258d 100644 --- a/contrib/file/Magdir/apple +++ b/contrib/file/Magdir/apple @@ -1,7 +1,9 @@ + #------------------------------------------------------------------------------ +# $File: apple,v 1.24 2010/11/25 15:00:12 christos Exp $ # apple: file(1) magic for Apple file formats # -0 search/1 FiLeStArTfIlEsTaRt binscii (apple ][) text +0 search/1/t FiLeStArTfIlEsTaRt binscii (apple ][) text 0 string \x0aGL Binary II (apple ][) data 0 string \x76\xff Squeezed (apple ][) data 0 string NuFile NuFile archive (apple ][) data diff --git a/contrib/file/Magdir/applix b/contrib/file/Magdir/applix index 9d348d152450..f3f362eec773 100644 --- a/contrib/file/Magdir/applix +++ b/contrib/file/Magdir/applix @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: applix,v 1.5 2009/09/19 16:28:08 christos Exp $ # applix: file(1) magic for Applixware # From: Peter Soos # diff --git a/contrib/file/Magdir/archive b/contrib/file/Magdir/archive index b75fac0838b5..2ff3323c40e4 100644 --- a/contrib/file/Magdir/archive +++ b/contrib/file/Magdir/archive @@ -1,4 +1,5 @@ #------------------------------------------------------------------------------ +# $File: archive,v 1.70 2011/10/26 15:44:47 christos Exp $ # archive: file(1) magic for archive formats (see also "msdos" for self- # extracting compressed archives) # @@ -11,6 +12,11 @@ 257 string ustar\040\040\0 GNU tar archive !:mime application/x-tar # encoding: gnu +# Incremental snapshot gnu-tar format from: +# http://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html +0 string GNU\ tar- GNU tar incremental snapshot data +>&0 regex [0-9]\.[0-9]+-[0-9]+ version %s + # cpio archives # # Yes, the top two "cpio archive" formats *are* supposed to just be "short". @@ -182,7 +188,7 @@ # MAR 2 string =-ah MAR archive data # ACB -0 belong&0x00f800ff 0x00800000 ACB archive data +#0 belong&0x00f800ff 0x00800000 ACB archive data # CPZ # TODO, this is what idarc says: 0 string \0\0\0 CPZ archive data # JRC @@ -243,13 +249,13 @@ # MS Compress 4 string \x88\xf0\x27 MS Compress archive data # updated by Joerg Jenderek ->9 string \0 ->>0 string KWAJ +>9 string \0 +>>0 string KWAJ >>>7 string \321\003 MS Compress archive data >>>>14 ulong >0 \b, original size: %ld bytes ->>>>18 ubyte >0x65 ->>>>>18 string x \b, was %.8s ->>>>>(10.b-4) string x \b.%.3s +>>>>18 ubyte >0x65 +>>>>>18 string x \b, was %.8s +>>>>>(10.b-4) string x \b.%.3s # MP3 (archiver, not lossy audio compression) 0 string MP3\x1a MP3-Archiver archive data # ZET @@ -274,7 +280,7 @@ # Splint 0 string \x93\xb9\x06 Splint archive data # InstallShield -0 string \x13\x5d\x65\x8c InstallShield Z archive Data +0 string \x13\x5d\x65\x8c InstallShield Z archive Data # Gather 1 string GTH Gather archive data # BOA @@ -533,7 +539,7 @@ >20 byte x - header level %d # taken from idarc [JW] 2 string -lZ PUT archive data -2 string -lz LZS archive data +2 string -lz LZS archive data 2 string -sw1- Swag archive data # RAR archiver (Greg Roelofs, newt@uchicago.edu) @@ -560,79 +566,140 @@ # [JW] see exe section for self-extracting version 0 string UC2\x1a UC2 archive data -# ZIP archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) +# PKZIP multi-volume archive +0 string PK\x07\x08PK\x03\x04 Zip multi-volume archive data, at least PKZIP v2.50 to extract +!:mime application/zip + +# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) 0 string PK\003\004 ->4 byte 0x00 Zip archive data -!:mime application/zip ->4 byte 0x09 Zip archive data, at least v0.9 to extract -!:mime application/zip ->4 byte 0x0a Zip archive data, at least v1.0 to extract -!:mime application/zip ->4 byte 0x0b Zip archive data, at least v1.1 to extract -!:mime application/zip ->0x161 string WINZIP Zip archive data, WinZIP self-extracting -!:mime application/zip ->4 byte 0x14 ->>30 ubelong !0x6d696d65 Zip archive data, at least v2.0 to extract -!:mime application/zip -# OpenOffice.org / KOffice / StarOffice documents -# Listed here because they ARE zip files -# -# From: Abel Cheung ->4 byte 0x14 ->>30 string mimetype +# Specialised zip formats which start with a member named 'mimetype' +# (stored uncompressed, with no 'extra field') containing the file's MIME type. +# Check for have 8-byte name, 0-byte extra field, name "mimetype", and +# contents starting with "application/": +>26 string \x8\0\0\0mimetypeapplication/ -# KOffice (1.2 or above) formats ->>>50 string vnd.kde. KOffice (>=1.2) ->>>>58 string karbon Karbon document ->>>>58 string kchart KChart document ->>>>58 string kformula KFormula document ->>>>58 string kivio Kivio document ->>>>58 string kontour Kontour document ->>>>58 string kpresenter KPresenter document ->>>>58 string kspread KSpread document ->>>>58 string kword KWord document +# KOffice / OpenOffice & StarOffice / OpenDocument formats +# From: Abel Cheung -# OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7) ->>>50 string vnd.sun.xml. OpenOffice.org 1.x ->>>>62 string writer Writer ->>>>>68 byte !0x2e document ->>>>>68 string .template template ->>>>>68 string .global global document ->>>>62 string calc Calc ->>>>>66 byte !0x2e spreadsheet ->>>>>66 string .template template ->>>>62 string draw Draw ->>>>>66 byte !0x2e document ->>>>>66 string .template template ->>>>62 string impress Impress ->>>>>69 byte !0x2e presentation ->>>>>69 string .template template ->>>>62 string math Math document ->>>>62 string base Database file +# KOffice (1.2 or above) formats +# (mimetype contains "application/vnd.kde.") +>>50 string vnd.kde. KOffice (>=1.2) +>>>58 string karbon Karbon document +>>>58 string kchart KChart document +>>>58 string kformula KFormula document +>>>58 string kivio Kivio document +>>>58 string kontour Kontour document +>>>58 string kpresenter KPresenter document +>>>58 string kspread KSpread document +>>>58 string kword KWord document -# OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8) -# http://lists.oasis-open.org/archives/office/200505/msg00006.html ->>>50 string vnd.oasis.opendocument. OpenDocument ->>>>73 string text ->>>>>77 byte !0x2d Text +# OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7) +# (mimetype contains "application/vnd.sun.xml.") +>>50 string vnd.sun.xml. OpenOffice.org 1.x +>>>62 string writer Writer +>>>>68 byte !0x2e document +>>>>68 string .template template +>>>>68 string .global global document +>>>62 string calc Calc +>>>>66 byte !0x2e spreadsheet +>>>>66 string .template template +>>>62 string draw Draw +>>>>66 byte !0x2e document +>>>>66 string .template template +>>>62 string impress Impress +>>>>69 byte !0x2e presentation +>>>>69 string .template template +>>>62 string math Math document +>>>62 string base Database file + +# OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8) +# http://lists.oasis-open.org/archives/office/200505/msg00006.html +# (mimetype contains "application/vnd.oasis.opendocument.") +>>50 string vnd.oasis.opendocument. OpenDocument +>>>73 string text +>>>>77 byte !0x2d Text !:mime application/vnd.oasis.opendocument.text ->>>>>77 string -template Text Template ->>>>>77 string -web HTML Document Template ->>>>>77 string -master Master Document ->>>>73 string graphics Drawing ->>>>>81 string -template Template ->>>>73 string presentation Presentation ->>>>>85 string -template Template ->>>>73 string spreadsheet Spreadsheet ->>>>>84 string -template Template ->>>>73 string chart Chart ->>>>>78 string -template Template ->>>>73 string formula Formula ->>>>>80 string -template Template ->>>>73 string database Database ->>>>73 string image Image +>>>>77 string -template Text Template +!:mime application/vnd.oasis.opendocument.text-template +>>>>77 string -web HTML Document Template +!:mime application/vnd.oasis.opendocument.text-web +>>>>77 string -master Master Document +!:mime application/vnd.oasis.opendocument.text-master +>>>73 string graphics +>>>>81 byte !0x2d Drawing +!:mime application/vnd.oasis.opendocument.graphics +>>>>81 string -template Template +!:mime application/vnd.oasis.opendocument.graphics-template +>>>73 string presentation +>>>>85 byte !0x2d Presentation +!:mime application/vnd.oasis.opendocument.presentation +>>>>85 string -template Template +!:mime application/vnd.oasis.opendocument.presentation-template +>>>73 string spreadsheet +>>>>84 byte !0x2d Spreadsheet +!:mime application/vnd.oasis.opendocument.spreadsheet +>>>>84 string -template Template +!:mime application/vnd.oasis.opendocument.spreadsheet-template +>>>73 string chart +>>>>78 byte !0x2d Chart +!:mime application/vnd.oasis.opendocument.chart +>>>>78 string -template Template +!:mime application/vnd.oasis.opendocument.chart-template +>>>73 string formula +>>>>80 byte !0x2d Formula +!:mime application/vnd.oasis.opendocument.formula +>>>>80 string -template Template +!:mime application/vnd.oasis.opendocument.formula-template +>>>73 string database Database +!:mime application/vnd.oasis.opendocument.database +>>>73 string image +>>>>78 byte !0x2d Image +!:mime application/vnd.oasis.opendocument.image +>>>>78 string -template Template +!:mime application/vnd.oasis.opendocument.image-template + +# EPUB (OEBPS) books using OCF (OEBPS Container Format) +# From: Adam Buchbinder +# http://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4. +# (mimetype contains "application/epub+zip") +>>50 string epub+zip EPUB ebook data +!:mime application/epub+zip + +# Catch other ZIP-with-mimetype formats +# In a ZIP file, the bytes immediately after a member's contents are +# always "PK". The 2 regex rules here print the "mimetype" member's +# contents up to the first 'P'. Luckily, most MIME types don't contain +# any capital 'P's. This is a kludge. +# (mimetype contains "application/") +>>50 string !epub+zip +>>>50 string !vnd.oasis.opendocument. +>>>>50 string !vnd.sun.xml. +>>>>>50 string !vnd.kde. +>>>>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?) +!:mime application/zip +# (mimetype contents other than "application/*") +>26 string \x8\0\0\0mimetype +>>38 string !application/ +>>>38 regex [!-OQ-~]+ Zip data (MIME type "%s"?) +!:mime application/zip + +# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu) +# Next line excludes specialized formats: +>26 string !\x8\0\0\0mimetype Zip archive data +!:mime application/zip +>>4 byte 0x09 \b, at least v0.9 to extract +>>4 byte 0x0a \b, at least v1.0 to extract +>>4 byte 0x0b \b, at least v1.1 to extract +>>4 byte 0x14 \b, at least v2.0 to extract +>>4 byte 0x2d \b, at least v3.0 to extract +>>0x161 string WINZIP \b, WinZIP self-extracting + +# StarView Metafile +# From Pierre Ducroquet +0 string VCLMTF StarView MetaFile +>6 beshort x \b, version %d +>8 belong x \b, size %d # Zoo archiver 20 lelong 0xfdc4a7dc Zoo archive data @@ -651,7 +718,7 @@ !:mime application/octet-stream # -# LBR. NB: May conflict with the questionable +# LBR. NB: May conflict with the questionable # "binary Computer Graphics Metafile" format. # 0 string \0\ \ \ \ \ \ \ \ \ \ \ \0\0 LBR archive data @@ -667,10 +734,10 @@ # From Rafael Laboissiere # The Project Revision Control System (see # http://prcs.sourceforge.net) generates a packaged project -# file which is recognized by the following entry: +# file which is recognized by the following entry: 0 leshort 0xeb81 PRCS packaged project -# Microsoft cabinets +# Microsoft cabinets # by David Necas (Yeti) #0 string MSCF\0\0\0\0 Microsoft cabinet file data, #>25 byte x v%d @@ -678,7 +745,7 @@ # MPi: All CABs have version 1.3, so this is pointless. # Better magic in debian-additions. -# GTKtalog catalogs +# GTKtalog catalogs # by David Necas (Yeti) 4 string gtktalog\ GTKtalog catalog data, >13 string 3 version 3 @@ -697,12 +764,12 @@ !:mime application/x-bittorrent # Atari MSA archive - Teemu Hukkanen -0 beshort 0x0e0f Atari MSA archive data ->2 beshort x \b, %d sectors per track ->4 beshort 0 \b, 1 sided ->4 beshort 1 \b, 2 sided ->6 beshort x \b, starting track: %d ->8 beshort x \b, ending track: %d +0 beshort 0x0e0f Atari MSA archive data +>2 beshort x \b, %d sectors per track +>4 beshort 0 \b, 1 sided +>4 beshort 1 \b, 2 sided +>6 beshort x \b, starting track: %d +>8 beshort x \b, ending track: %d # Alternate ZIP string (amc@arwen.cs.berkeley.edu) 0 string PK00PK\003\004 Zip archive data @@ -747,7 +814,7 @@ # DR-DOS 7.03 Packed File *.??_ 0 string Packed\ File\ Personal NetWare Packed File ->12 string x \b, was "%.12s" +>12 string x \b, was "%.12s" # EET archive # From: Tilman Sauerbeck @@ -803,3 +870,38 @@ >24 belong 1 SHA-1 checksum >24 belong 2 MD5 checksum +# Type: Parity Archive +# From: Daniel van Eeden +0 string PAR2 Parity Archive Volume Set + +# Bacula volume format. (Volumes always start with a block header.) +# URL: http://bacula.org/3.0.x-manuals/en/developers/developers/Block_Header.html +# From: Adam Buchbinder +12 string BB02 Bacula volume +>20 bedate x \b, started %s + +# ePub is XHTML + XML inside a ZIP archive. The first member of the +# archive must be an uncompressed file called 'mimetype' with contents +# 'application/epub+zip' + +# start by checking that this is a ZIP archive, then check for the +# proper mimetype file +# From: Ralf Brown +0 string PK\003\004 +>0x1E string mimetypeapplication/epub+zip EPUB document +!:mime application/epub+zip + +# From: "MichaÅ‚ Górny" +# ZPAQ: http://mattmahoney.net/dc/zpaq.html +0 string zPQ ZPAQ stream +>3 byte x \b, level %d + +# BBeB ebook, unencrypted (LRF format) +# URL: http://www.sven.de/librie/Librie/LrfFormat +# From: Adam Buchbinder +0 string L\0R\0F\0\0\0 BBeB ebook data, unencrypted +>8 beshort x \b, version %d +>36 byte 1 \b, front-to-back +>36 byte 16 \b, back-to-front +>42 beshort x \b, (%dx, +>44 beshort x %d) diff --git a/contrib/file/Magdir/assembler b/contrib/file/Magdir/assembler new file mode 100644 index 000000000000..90aa4ca65c0f --- /dev/null +++ b/contrib/file/Magdir/assembler @@ -0,0 +1,14 @@ +#------------------------------------------------------------------------------ +# $File: assembler,v 1.1 2011/12/08 12:12:46 rrt Exp $ +# make: file(1) magic for assembler source +# +0 regex \^\.asciiz\? assembler source text +!:mime text/x-asm +0 regex \^\.byte assembler source text +!:mime text/x-asm +0 regex \^\.even assembler source text +!:mime text/x-asm +0 regex \^\.globl assembler source text +!:mime text/x-asm +0 regex \^\.text assembler source text +!:mime text/x-asm diff --git a/contrib/file/Magdir/asterix b/contrib/file/Magdir/asterix index d89504a2407f..a9ea885cdb8c 100644 --- a/contrib/file/Magdir/asterix +++ b/contrib/file/Magdir/asterix @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: asterix,v 1.5 2009/09/19 16:28:08 christos Exp $ # asterix: file(1) magic for Aster*x; SunOS 5.5.1 gave the 4-character # strings as "long" - we assume they're just strings: # From: guy@netapp.com (Guy Harris) diff --git a/contrib/file/Magdir/att3b b/contrib/file/Magdir/att3b index 884ad4aa4332..96880111e057 100644 --- a/contrib/file/Magdir/att3b +++ b/contrib/file/Magdir/att3b @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: att3b,v 1.8 2009/09/19 16:28:08 christos Exp $ # att3b: file(1) magic for AT&T 3B machines # # The `versions' should be un-commented if they work for you. diff --git a/contrib/file/Magdir/audio b/contrib/file/Magdir/audio index 04f80eb47ca3..0cbe25903280 100644 --- a/contrib/file/Magdir/audio +++ b/contrib/file/Magdir/audio @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: audio,v 1.64 2012/02/20 16:37:34 christos Exp $ # audio: file(1) magic for sound formats (see also "iff") # # Jan Nicolai Langfeldt (janl@ifi.uio.no), Dan Quinlan (quinlan@yggdrasil.com), @@ -116,7 +118,7 @@ # Real Audio (Magic .ra\0375) 0 belong 0x2e7261fd RealAudio sound file !:mime audio/x-pn-realaudio -0 string .RMF RealMedia file +0 string .RMF\0\0\0 RealMedia file !:mime application/vnd.rn-realmedia #video/x-pn-realvideo #video/vnd.rn-realvideo @@ -250,12 +252,17 @@ >0x36 string >\0 author: "%s" >0x56 string >\0 copyright: "%s" -# IRCAM -# VAX and MIPS files are little-endian; Sun and NeXT are big-endian -0 belong 0x64a30100 IRCAM file (VAX) -0 belong 0x64a30200 IRCAM file (Sun) +# IRCAM sound files - Michael Pruett +# http://www-mmsp.ece.mcgill.ca/documents/AudioFormats/IRCAM/IRCAM.html +0 belong 0x64a30100 IRCAM file (VAX little-endian) +0 belong 0x0001a364 IRCAM file (VAX big-endian) +0 belong 0x64a30200 IRCAM file (Sun big-endian) +0 belong 0x0002a364 IRCAM file (Sun little-endian) 0 belong 0x64a30300 IRCAM file (MIPS little-endian) -0 belong 0x64a30400 IRCAM file (NeXT) +0 belong 0x0003a364 IRCAM file (MIPS big-endian) +0 belong 0x64a30400 IRCAM file (NeXT big-endian) +0 belong 0x64a30400 IRCAM file (NeXT big-endian) +0 belong 0x0004a364 IRCAM file (NeXT little-endian) # NIST SPHERE 0 string NIST_1A\n\ \ \ 1024\n NIST SPHERE file @@ -276,8 +283,8 @@ >21 ubyte <128 note %d, >22 byte =0 replay 5.485 KHz >22 byte =1 replay 8.084 KHz ->22 byte =2 replay 10.971 Khz ->22 byte =3 replay 16.168 Khz +>22 byte =2 replay 10.971 KHz +>22 byte =3 replay 16.168 KHz >22 byte =4 replay 21.942 KHz >22 byte =5 replay 32.336 KHz >22 byte =6 replay 43.885 KHz @@ -306,6 +313,15 @@ >122 byte&0x1 =1 PAL >122 byte&0x1 =0 NTSC +# Type: SNES SPC700 sound files +# From: Josh Triplett +0 string SNES-SPC700\ Sound\ File\ Data\ v SNES SPC700 sound file +>&0 string 0.30 \b, version %s +>>0x23 byte 0x1B \b, without ID666 tag +>>0x23 byte 0x1A \b, with ID666 tag +>>>0x2E string >\0 \b, song "%.32s" +>>>0x4E string >\0 \b, game "%.32s" + # Impulse tracker module (audio/x-it) 0 string IMPM Impulse Tracker module sound data - !:mime audio/x-mod @@ -475,6 +491,7 @@ # From danny.milo@gmx.net (Danny Milosavljevic) # New version from Abel Cheung 0 string MAC\040 Monkey's Audio compressed format +!:mime audio/x-ape >4 uleshort >0x0F8B version %d >>(0x08.l) uleshort =1000 with fast compression >>(0x08.l) uleshort =2000 with normal compression @@ -604,3 +621,18 @@ # URL: http://filext.com/detaillist.php?extdetail=AMR # From: Russell Coker 0 string #!AMR Adaptive Multi-Rate Codec (GSM telephony) + +# Type: SuperCollider 3 Synth Definition File Format +# From: Mario Lang +0 string SCgf SuperCollider3 Synth Definition file, +>4 belong x version %d + +# Type: True Audio Lossless Audio +# URL: http://wiki.multimedia.cx/index.php?title=True_Audio +# From: Mike Melanson +0 string TTA1 True Audio Lossless Audio + +# Type: WavPack Lossless Audio +# URL: http://wiki.multimedia.cx/index.php?title=WavPack +# From: Mike Melanson +0 string wvpk WavPack Lossless Audio diff --git a/contrib/file/Magdir/basis b/contrib/file/Magdir/basis index 1813c0e0f0a7..128aa4c77c2d 100644 --- a/contrib/file/Magdir/basis +++ b/contrib/file/Magdir/basis @@ -1,4 +1,6 @@ + #---------------------------------------------------------------- +# $File: basis,v 1.4 2009/09/19 16:28:08 christos Exp $ # basis: file(1) magic for BBx/Pro5-files # Oliver Dammer 2005/11/07 # http://www.basis.com business-basic-files. diff --git a/contrib/file/Magdir/bflt b/contrib/file/Magdir/bflt index 5e288d130b07..03eb59d34ae1 100644 --- a/contrib/file/Magdir/bflt +++ b/contrib/file/Magdir/bflt @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: bflt,v 1.4 2009/09/19 16:28:08 christos Exp $ # bFLT: file(1) magic for BFLT uclinux binary files # # From Philippe De Muyter diff --git a/contrib/file/Magdir/blcr b/contrib/file/Magdir/blcr new file mode 100644 index 000000000000..70ea563e182d --- /dev/null +++ b/contrib/file/Magdir/blcr @@ -0,0 +1,25 @@ +# Berkeley Lab Checkpoint Restart (BLCR) checkpoint context files +# http://ftg.lbl.gov/checkpoint +0 string C\0\0\0R\0\0\0 BLCR +>16 lelong 1 x86 +>16 lelong 3 alpha +>16 lelong 5 x86-64 +>16 lelong 7 ARM +>8 lelong x context data (little endian, version %d) +# Uncomment the following only of your "file" program supports "search" +#>0 search/1024 VMA\06 for kernel +#>>&1 byte x %d. +#>>&2 byte x %d. +#>>&3 byte x %d +0 string \0\0\0C\0\0\0R BLCR +>16 belong 2 SPARC +>16 belong 4 ppc +>16 belong 6 ppc64 +>16 belong 7 ARMEB +>16 belong 8 SPARC64 +>8 belong x context data (big endian, version %d) +# Uncomment the following only of your "file" program supports "search" +#>0 search/1024 VMA\06 for kernel +#>>&1 byte x %d. +#>>&2 byte x \b%d. +#>>&3 byte x \b%d diff --git a/contrib/file/Magdir/blender b/contrib/file/Magdir/blender index 6ef79507ef9b..1814738ab4a2 100644 --- a/contrib/file/Magdir/blender +++ b/contrib/file/Magdir/blender @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: blender,v 1.5 2009/09/19 16:28:08 christos Exp $ # blender: file(1) magic for Blender 3D related files # # Native format rule v1.2. For questions use the developers list diff --git a/contrib/file/Magdir/blit b/contrib/file/Magdir/blit index 7a470ed4a669..d5b687fce60b 100644 --- a/contrib/file/Magdir/blit +++ b/contrib/file/Magdir/blit @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: blit,v 1.8 2009/09/19 16:28:08 christos Exp $ # blit: file(1) magic for 68K Blit stuff as seen from 680x0 machine # # Note that this 0407 conflicts with several other a.out formats... diff --git a/contrib/file/Magdir/bout b/contrib/file/Magdir/bout index 4cd6f76ef540..693cc2a4bd81 100644 --- a/contrib/file/Magdir/bout +++ b/contrib/file/Magdir/bout @@ -1,4 +1,6 @@ -# + +#------------------------------------------------------------------------------ +# $File: bout,v 1.5 2009/09/19 16:28:08 christos Exp $ # i80960 b.out objects and archives # 0 long 0x10d i960 b.out relocatable object diff --git a/contrib/file/Magdir/bsdi b/contrib/file/Magdir/bsdi index ad547599ba83..be16e3a1a23b 100644 --- a/contrib/file/Magdir/bsdi +++ b/contrib/file/Magdir/bsdi @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: bsdi,v 1.5 2009/09/19 16:28:08 christos Exp $ # bsdi: file(1) magic for BSD/OS (from BSDI) objects # diff --git a/contrib/file/Magdir/bsi b/contrib/file/Magdir/bsi new file mode 100644 index 000000000000..51a62891c2c8 --- /dev/null +++ b/contrib/file/Magdir/bsi @@ -0,0 +1,9 @@ +# Chiasmus is a encryption standard developed by the German Federal +# Office for Information Security (Bundesamt fuer Sicherheit in der +# Informationstechnik). + +# Extension: .xia +0 string XIA1 Chiasmus encrypted data + +# Extension: .xis +0 string XIS Chiasmus key diff --git a/contrib/file/Magdir/btsnoop b/contrib/file/Magdir/btsnoop index 85d867a446d4..d72daad877d7 100644 --- a/contrib/file/Magdir/btsnoop +++ b/contrib/file/Magdir/btsnoop @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: btsnoop,v 1.5 2009/09/19 16:28:08 christos Exp $ # BTSnoop: file(1) magic for BTSnoop files # # From diff --git a/contrib/file/Magdir/c-lang b/contrib/file/Magdir/c-lang index 895e37f656bc..066562760a51 100644 --- a/contrib/file/Magdir/c-lang +++ b/contrib/file/Magdir/c-lang @@ -1,19 +1,49 @@ #------------------------------------------------------------------------------ -# c-lang: file(1) magic for C programs (or REXX) +# $File: c-lang,v 1.16 2011/12/09 08:02:16 rrt Exp $ +# c-lang: file(1) magic for C and related languages programs # -# XPM icons (Greg Roelofs, newt@uchicago.edu) -# if you uncomment "/*" for C/REXX below, also uncomment this entry -#0 string /*\ XPM\ */ X pixmap image data -#!:mime image/x-xpmi +# BCPL +0 search/8192 "libhdr" BCPL source text +!:mime text/x-bcpl +0 search/8192 "LIBHDR" BCPL source text +!:mime text/x-bcpl -# 3DS (3d Studio files) Conflicts with diff output 0x3d '=' -#16 beshort 0x3d3d image/x-3ds +# C +0 regex \^#include C source text +!:mime text/x-c +0 regex \^char C source text +!:mime text/x-c +0 regex \^double C source text +!:mime text/x-c +0 regex \^extern C source text +!:mime text/x-c +0 regex \^float C source text +!:mime text/x-c +0 regex \^struct C source text +!:mime text/x-c +0 regex \^union C source text +!:mime text/x-c +0 search/8192 main( C source text +!:mime text/x-c -# this first will upset you if you're a PL/1 shop... -# in which case rm it; ascmagic will catch real C programs -#0 search/1 /* C or REXX program text -#0 search/1 // C++ program text +# C++ +# The strength of these rules is increased so they beat the C rules above +0 regex \^template C++ source text +!:strength + 10 +!:mime text/x-c++ +0 regex \^virtual C++ source text +!:strength + 10 +!:mime text/x-c++ +0 regex \^class C++ source text +!:strength + 10 +!:mime text/x-c++ +0 regex \^public: C++ source text +!:strength + 10 +!:mime text/x-c++ +0 regex \^private: C++ source text +!:strength + 10 +!:mime text/x-c++ # From: Mikhail Teterin 0 string cscope cscope reference data diff --git a/contrib/file/Magdir/c64 b/contrib/file/Magdir/c64 index f8a8b76c6b3b..eea3e319101a 100644 --- a/contrib/file/Magdir/c64 +++ b/contrib/file/Magdir/c64 @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: c64,v 1.5 2009/09/19 16:28:08 christos Exp $ # c64: file(1) magic for various commodore 64 related files # # From: Dirk Jagdmann diff --git a/contrib/file/Magdir/cad b/contrib/file/Magdir/cad index 757dab6be089..cc4ef343d822 100644 --- a/contrib/file/Magdir/cad +++ b/contrib/file/Magdir/cad @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: cad,v 1.11 2011/12/08 12:12:46 rrt Exp $ # autocad: file(1) magic for cad files # @@ -50,9 +51,54 @@ # AutoCad, from Nahuel Greco # AutoCAD DWG versions R12/R13/R14 (www.autodesk.com) -0 string AC1012 AutoCad (release 12) -0 string AC1013 AutoCad (release 13) -0 string AC1014 AutoCad (release 14) +0 string AC1012 DWG AutoDesk AutoCad (release 12) +0 string AC1013 DWG AutoDesk AutoCad (release 13) +0 string AC1014 DWG AutoDesk AutoCad (release 14) +# A new version of AutoCAD DWG +# Sergey Zaykov (mail_of_sergey@mail.ru, sergey_zaikov@rambler.ru, +# ICQ 358572321) +# From various sources like: +# http://autodesk.blogs.com/between_the_lines/autocad-release-history.html +0 string AC1018 DWG AutoDesk AutoCAD 2004/2005/2006 +0 string AC1021 DWG AutoDesk AutoCAD 2007/2008/2009 +0 string AC1024 DWG AutoDesk AutoCAD 2010/2011 + +# KOMPAS 2D drawing from ASCON +# This is KOMPAS 2D drawing or fragment of drawing but is not detailed nor +# gathered nor specification +# ASCON http://ascon.net/main/ in English, +# http://ascon.ru/ main site in Russian +# Extension is CDW for drawing and FRW for fragment of drawing +# Sergey Zaykov (mail_of_sergey@mail.ru, sergey_zaikov@rambler.ru, +# ICQ 358572321, http://vkontakte.ru/id16076543) +# From: +# http://sd.ascon.ru/otrs/customer.pl?Action=CustomerFAQ&CategoryID=4&ItemID=292 +# (in russian) and my experiments +0 string KF +>2 belong 0x4E00000C Kompas drawing 12.0 SP1 +>2 belong 0x4D00000C Kompas drawing 12.0 +>2 belong 0x3200000B Kompas drawing 11.0 SP1 +>2 belong 0x3100000B Kompas drawing 11.0 +>2 belong 0x2310000A Kompas drawing 10.0 SP1 +>2 belong 0x2110000A Kompas drawing 10.0 +>2 belong 0x08000009 Kompas drawing 9.0 SP1 +>2 belong 0x05000009 Kompas drawing 9.0 +>2 belong 0x33010008 Kompas drawing 8+ +>2 belong 0x1A000008 Kompas drawing 8.0 +>2 belong 0x2C010107 Kompas drawing 7+ +>2 belong 0x05000007 Kompas drawing 7.0 +>2 belong 0x32000006 Kompas drawing 6+ +>2 belong 0x09000006 Kompas drawing 6.0 +>2 belong 0x5C009005 Kompas drawing 5.11R03 +>2 belong 0x54009005 Kompas drawing 5.11R02 +>2 belong 0x51009005 Kompas drawing 5.11R01 +>2 belong 0x22009005 Kompas drawing 5.10R03 +>2 belong 0x22009005 Kompas drawing 5.10R02 mar +>2 belong 0x21009005 Kompas drawing 5.10R02 febr +>2 belong 0x19009005 Kompas drawing 5.10R01 +>2 belong 0xF4008005 Kompas drawing 5.9R01.003 +>2 belong 0x1C008005 Kompas drawing 5.9R01.002 +>2 belong 0x11008005 Kompas drawing 5.8R01.003 # CAD: file(1) magic for computer aided design files # Phillip Griffith @@ -67,3 +113,6 @@ 0 string AC1012 AutoDesk AutoCAD R13 0 string AC1014 AutoDesk AutoCAD R14 0 string AC1015 AutoDesk AutoCAD R2000 + +# 3DS (3d Studio files) Conflicts with diff output 0x3d '=' +#16 beshort 0x3d3d image/x-3ds diff --git a/contrib/file/Magdir/cafebabe b/contrib/file/Magdir/cafebabe index e25c974dd55b..6400e2f3b0e2 100644 --- a/contrib/file/Magdir/cafebabe +++ b/contrib/file/Magdir/cafebabe @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: cafebabe,v 1.8 2009/09/19 16:28:08 christos Exp $ # Cafe Babes unite! # # Since Java bytecode and Mach-O fat-files have the same magic number, the test diff --git a/contrib/file/Magdir/cddb b/contrib/file/Magdir/cddb index 42ca416bd89b..5d8a8517e276 100644 --- a/contrib/file/Magdir/cddb +++ b/contrib/file/Magdir/cddb @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: cddb,v 1.4 2009/09/19 16:28:08 christos Exp $ # CDDB: file(1) magic for CDDB(tm) format CD text data files # # From @@ -7,4 +9,4 @@ # CDDB-enabled CD player applications. # -0 search/1/b #\040xmcd CDDB(tm) format CD text data +0 search/1/w #\040xmcd CDDB(tm) format CD text data diff --git a/contrib/file/Magdir/chord b/contrib/file/Magdir/chord index 3639f3c4917e..00d0bec65a5e 100644 --- a/contrib/file/Magdir/chord +++ b/contrib/file/Magdir/chord @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: chord,v 1.5 2010/09/20 19:19:16 rrt Exp $ # chord: file(1) magic for Chord music sheet typesetting utility input files # # From Philippe De Muyter @@ -7,3 +8,8 @@ # 0 string {title Chord text file +# Type: PowerTab file format +# URL: http://www.power-tab.net/ +# From: Jelmer Vernooij +0 string ptab\003\000 Power-Tab v3 Tablature File +0 string ptab\004\000 Power-Tab v4 Tablature File diff --git a/contrib/file/Magdir/cisco b/contrib/file/Magdir/cisco index 77e3efb5e39d..0279bbb5b5af 100644 --- a/contrib/file/Magdir/cisco +++ b/contrib/file/Magdir/cisco @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: cisco,v 1.4 2009/09/19 16:28:08 christos Exp $ # cisco: file(1) magic for cisco Systems routers # # Most cisco file-formats are covered by the generic elf code diff --git a/contrib/file/Magdir/citrus b/contrib/file/Magdir/citrus index 6d4479455391..ff2471ea75ac 100644 --- a/contrib/file/Magdir/citrus +++ b/contrib/file/Magdir/citrus @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: citrus,v 1.4 2009/09/19 16:28:08 christos Exp $ # citrus locale declaration # diff --git a/contrib/file/Magdir/clarion b/contrib/file/Magdir/clarion index 5f5f6e75613c..cff7a3bdd0e0 100644 --- a/contrib/file/Magdir/clarion +++ b/contrib/file/Magdir/clarion @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: clarion,v 1.4 2009/09/19 16:28:08 christos Exp $ # clarion: file(1) magic for # Clarion Personal/Professional Developer # (v2 and above) # From: Julien Blache diff --git a/contrib/file/Magdir/claris b/contrib/file/Magdir/claris index 368b473260ec..a2b120a834c5 100644 --- a/contrib/file/Magdir/claris +++ b/contrib/file/Magdir/claris @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: claris,v 1.5 2009/09/19 16:28:08 christos Exp $ # claris: file(1) magic for claris # "H. Nanosecond" # Claris Works a word processor, etc. diff --git a/contrib/file/Magdir/clipper b/contrib/file/Magdir/clipper index c325cb8f7c7d..9f4753405906 100644 --- a/contrib/file/Magdir/clipper +++ b/contrib/file/Magdir/clipper @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: clipper,v 1.6 2009/09/19 16:28:08 christos Exp $ # clipper: file(1) magic for Intergraph (formerly Fairchild) Clipper. # # XXX - what byte order does the Clipper use? diff --git a/contrib/file/Magdir/commands b/contrib/file/Magdir/commands index 0942802d9101..4fb151f32927 100644 --- a/contrib/file/Magdir/commands +++ b/contrib/file/Magdir/commands @@ -1,69 +1,68 @@ #------------------------------------------------------------------------------ +# $File: commands,v 1.42 2011/12/05 23:14:02 rrt Exp $ # commands: file(1) magic for various shells and interpreters # -#0 string : shell archive or script for antique kernel text -0 string/b #!\ /bin/sh POSIX shell script text executable +#0 string/w : shell archive or script for antique kernel text +0 string/wt #!\ /bin/sh POSIX shell script text executable !:mime text/x-shellscript -0 string/b #!\ /bin/csh C shell script text executable +0 string/wt #!\ /bin/csh C shell script text executable !:mime text/x-shellscript # korn shell magic, sent by George Wu, gwu@clyde.att.com -0 string/b #!\ /bin/ksh Korn shell script text executable +0 string/wt #!\ /bin/ksh Korn shell script text executable !:mime text/x-shellscript -0 string/b #!\ /bin/tcsh Tenex C shell script text executable +0 string/wt #!\ /bin/tcsh Tenex C shell script text executable !:mime text/x-shellscript -0 string/b #!\ /usr/local/tcsh Tenex C shell script text executable +0 string/wt #!\ /usr/bin/tcsh Tenex C shell script text executable !:mime text/x-shellscript -0 string/b #!\ /usr/local/bin/tcsh Tenex C shell script text executable +0 string/wt #!\ /usr/local/tcsh Tenex C shell script text executable +!:mime text/x-shellscript +0 string/wt #!\ /usr/local/bin/tcsh Tenex C shell script text executable !:mime text/x-shellscript # # zsh/ash/ae/nawk/gawk magic from cameron@cs.unsw.oz.au (Cameron Simpson) -0 string/b #!\ /bin/zsh Paul Falstad's zsh script text executable +0 string/wt #!\ /bin/zsh Paul Falstad's zsh script text executable !:mime text/x-shellscript -0 string/b #!\ /usr/bin/zsh Paul Falstad's zsh script text executable +0 string/wt #!\ /usr/bin/zsh Paul Falstad's zsh script text executable !:mime text/x-shellscript -0 string/b #!\ /usr/local/bin/zsh Paul Falstad's zsh script text executable +0 string/wt #!\ /usr/local/bin/zsh Paul Falstad's zsh script text executable !:mime text/x-shellscript -0 string/b #!\ /usr/local/bin/ash Neil Brown's ash script text executable +0 string/wt #!\ /usr/local/bin/ash Neil Brown's ash script text executable !:mime text/x-shellscript -0 string/b #!\ /usr/local/bin/ae Neil Brown's ae script text executable +0 string/wt #!\ /usr/local/bin/ae Neil Brown's ae script text executable !:mime text/x-shellscript -0 string/b #!\ /bin/nawk new awk script text executable +0 string/wt #!\ /bin/nawk new awk script text executable !:mime text/x-nawk -0 string/b #!\ /usr/bin/nawk new awk script text executable +0 string/wt #!\ /usr/bin/nawk new awk script text executable !:mime text/x-nawk -0 string/b #!\ /usr/local/bin/nawk new awk script text executable +0 string/wt #!\ /usr/local/bin/nawk new awk script text executable !:mime text/x-nawk -0 string/b #!\ /bin/gawk GNU awk script text executable +0 string/wt #!\ /bin/gawk GNU awk script text executable !:mime text/x-gawk -0 string/b #!\ /usr/bin/gawk GNU awk script text executable +0 string/wt #!\ /usr/bin/gawk GNU awk script text executable !:mime text/x-gawk -0 string/b #!\ /usr/local/bin/gawk GNU awk script text executable +0 string/wt #!\ /usr/local/bin/gawk GNU awk script text executable !:mime text/x-gawk # -0 string/b #!\ /bin/awk awk script text executable +0 string/wt #!\ /bin/awk awk script text executable !:mime text/x-awk -0 string/b #!\ /usr/bin/awk awk script text executable +0 string/wt #!\ /usr/bin/awk awk script text executable !:mime text/x-awk -# update to distinguish from *.vcf files -# this is broken because postscript has /EBEGIN{ for example. -#0 search/Bb BEGIN { awk script text +0 regex =^\\s*BEGIN\\s*[{] awk script text # AT&T Bell Labs' Plan 9 shell -0 string/b #!\ /bin/rc Plan 9 rc shell script text executable +0 string/wt #!\ /bin/rc Plan 9 rc shell script text executable # bash shell magic, from Peter Tobias (tobias@server.et-inf.fho-emden.de) -0 string/b #!\ /bin/bash Bourne-Again shell script text executable +0 string/wt #!\ /bin/bash Bourne-Again shell script text executable !:mime text/x-shellscript -0 string/b #!\ /usr/local/bin/bash Bourne-Again shell script text executable +0 string/wt #!\ /usr/bin/bash Bourne-Again shell script text executable +!:mime text/x-shellscript +0 string/wt #!\ /usr/local/bash Bourne-Again shell script text executable +!:mime text/x-shellscript +0 string/wt #!\ /usr/local/bin/bash Bourne-Again shell script text executable !:mime text/x-shellscript - -# using env -0 string #!/usr/bin/env a ->15 string >\0 %s script text executable -0 string #!\ /usr/bin/env a ->16 string >\0 %s script text executable # PHP scripts # Ulf Harnhammar @@ -73,11 +72,21 @@ !:mime text/x-php 0 search/1 = +0 string =24 regex [0-9.]+ \b, version %s !:mime text/x-php 0 string Zend\x00 PHP script Zend Optimizer data -0 string $! DCL command file +0 string/t $! DCL command file + +# Type: Pdmenu +# URL: http://packages.debian.org/pdmenu +# From: Edward Betts +0 string #!/usr/bin/pdmenu Pdmenu configuration file text diff --git a/contrib/file/Magdir/communications b/contrib/file/Magdir/communications index 81417ec08371..8e1d908b6759 100644 --- a/contrib/file/Magdir/communications +++ b/contrib/file/Magdir/communications @@ -1,5 +1,6 @@ #---------------------------------------------------------------------------- +# $File: communications,v 1.5 2009/09/19 16:28:08 christos Exp $ # communication # TTCN is the Tree and Tabular Combined Notation described in ISO 9646-3. diff --git a/contrib/file/Magdir/compress b/contrib/file/Magdir/compress index 5e18de0b8b71..94c209d76112 100644 --- a/contrib/file/Magdir/compress +++ b/contrib/file/Magdir/compress @@ -1,5 +1,5 @@ - #------------------------------------------------------------------------------ +# $File: compress,v 1.49 2011/12/07 22:04:27 christos Exp $ # compress: file(1) magic for pure-compression formats (no archives) # # compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, etc. @@ -19,7 +19,7 @@ # Edited by Chris Chittleborough , March 2002 # * Original filename is only at offset 10 if "extra field" absent # * Produce shorter output - notably, only report compression methods -# other than 8 ("deflate", the only method defined in RFC 1952). +# other than 8 ("deflate", the only method defined in RFC 1952). 0 string \037\213 gzip compressed data !:mime application/x-gzip >2 byte <8 \b, reserved method @@ -182,23 +182,30 @@ >4 belong 0x090A0C0D best compression # 7-zip archiver, from Thomas Klausner (wiz@danbala.tuwien.ac.at) -# http://www.7-zip.org or DOC/7zFormat.txt +# http://www.7-zip.org or DOC/7zFormat.txt # 0 string 7z\274\257\047\034 7-zip archive data, >6 byte x version %d >7 byte x \b.%d +!:mime application/x-7z-compressed # Type: LZMA -# URL: http://www.7-zip.org/sdk.html -# From: Robert Millan and Reuben Thomas -# Commented out because apparently not reliable (according to Debian -# bug #364260) -#0 string ]\000\000\200\000 LZMA compressed data +0 lelong&0xffffff =0x5d +>12 leshort =0xff LZMA compressed data, +>>5 lequad =0xffffffffffffffff streamed +>>5 lequad !0xffffffffffffffff non-streamed, size %lld +!:mime application/x-lzma # http://tukaani.org/xz/xz-file-format.txt -0 ustring \xFD7zXZ\x00 xz compressed data +0 ustring \xFD7zXZ\x00 XZ compressed data !:mime application/x-xz +# https://github.com/ckolivas/lrzip/blob/master/doc/magic.header.txt +0 string LRZI LRZIP compressed data +>4 byte x - version %d +>5 byte x \b.%d +!:mime application/x-lrzip + # AFX compressed files (Wolfram Kleff) 2 string -afx- AFX compressed file data @@ -213,7 +220,12 @@ >5 byte x \b.%d >6 belong x (%d bytes) -# Type: XZ -# URL: http://tukaani.org/xz/ -0 string \xfd\x37\x7a\x58\x5a\x00 XZ compressed data -!:mime application/x-xz +0 string ArC\x01 FreeArc archive + +# Type: DACT compressed files +0 long 0x444354C3 DACT compressed data +>4 byte >-1 (version %i. +>5 byte >-1 %i. +>6 byte >-1 %i) +>7 long >0 , original size: %i bytes +>15 long >30 , block size: %i bytes diff --git a/contrib/file/Magdir/console b/contrib/file/Magdir/console index 2af6575499ac..573dd15fd582 100644 --- a/contrib/file/Magdir/console +++ b/contrib/file/Magdir/console @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: console,v 1.18 2010/09/20 19:19:17 rrt Exp $ # Console game magic # Toby Deshane # ines: file(1) magic for Marat's iNES Nintendo Entertainment System @@ -163,9 +165,13 @@ # Atari Lynx cartridge dump (EXE/BLL header) # From: "Stefan A. Haubenthal" -0 beshort 0x8008 Lynx cartridge, ->2 beshort x RAM start $%04x ->6 string BS93 +# Double-check that the image type matches too, 0x8008 conflicts with +# 8 character OMF-86 object file headers. +0 beshort 0x8008 +>6 string BS93 Lynx homebrew cartridge +>>2 beshort x \b, RAM start $%04x +>6 string LYNX Lynx cartridge +>>2 beshort x \b, RAM start $%04x # Opera file system that is used on the 3DO console # From: Serge van den Boom @@ -252,3 +258,7 @@ >>>(0x18.l-26) lelong x CRC32 0x%08x >>>(0x18.l-23) string x "%s" +# Type: scummVM savegame files +# From: Sven Hartge +0 string SCVM ScummVM savegame +>12 string >\0 "%s" diff --git a/contrib/file/Magdir/convex b/contrib/file/Magdir/convex index b1235d7e79a8..6141a82bba08 100644 --- a/contrib/file/Magdir/convex +++ b/contrib/file/Magdir/convex @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: convex,v 1.7 2009/09/19 16:28:08 christos Exp $ # convex: file(1) magic for Convex boxes # # Convexes are big-endian. diff --git a/contrib/file/Magdir/cracklib b/contrib/file/Magdir/cracklib index a1a5a275532e..167659670d93 100644 --- a/contrib/file/Magdir/cracklib +++ b/contrib/file/Magdir/cracklib @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: cracklib,v 1.7 2009/09/19 16:28:08 christos Exp $ # cracklib: file (1) magic for cracklib v2.7 0 lelong 0x70775631 Cracklib password index, little endian diff --git a/contrib/file/Magdir/ctags b/contrib/file/Magdir/ctags index f326cf5cf96f..f480d32338bd 100644 --- a/contrib/file/Magdir/ctags +++ b/contrib/file/Magdir/ctags @@ -1,4 +1,6 @@ + # ---------------------------------------------------------------------------- +# $File: ctags,v 1.6 2009/09/19 16:28:08 christos Exp $ # ctags: file (1) magic for Exuberant Ctags files # From: Alexander Mai 0 search/1 =!_TAG Exuberant Ctags tag file text diff --git a/contrib/file/Magdir/cups b/contrib/file/Magdir/cups new file mode 100644 index 000000000000..da2cc4cce01e --- /dev/null +++ b/contrib/file/Magdir/cups @@ -0,0 +1,82 @@ + +#------------------------------------------------------------------------------ +# $File: cups,v 1.1 2011/11/10 18:59:54 christos Exp $ +# Cups: file(1) magic for the cups raster file format +# From: Laurent Martelli +# http://www.cups.org/documentation.php/spec-raster.html +# + +# Cups Raster image format, Big Endian +0 string RaS +!:mime application/vnd.cups-raster +>3 string t Cups Raster version 1, Big Endian +>3 string 2 Cups Raster version 2, Big Endian +>3 string 3 Cups Raster version 3, Big Endian +>280 belong x \b, %d +>284 belong x \bx%d dpi +>376 belong x \b, %dx +>380 belong x \b%d pixels +>388 belong x %d bits/color +>392 belong x %d bits/pixel +>400 belong 0 ColorOrder=Chunky +>400 belong 1 ColorOrder=Banded +>400 belong 2 ColorOrder=Planar +>404 belong 0 ColorSpace=gray +>404 belong 1 ColorSpace=RGB +>404 belong 2 ColorSpace=RGBA +>404 belong 3 ColorSpace=black +>404 belong 4 ColorSpace=CMY +>404 belong 5 ColorSpace=YMC +>404 belong 6 ColorSpace=CMYK +>404 belong 7 ColorSpace=YMCK +>404 belong 8 ColorSpace=KCMY +>404 belong 9 ColorSpace=KCMYcm +>404 belong 10 ColorSpace=GMCK +>404 belong 11 ColorSpace=GMCS +>404 belong 12 ColorSpace=WHITE +>404 belong 13 ColorSpace=GOLD +>404 belong 14 ColorSpace=SILVER +>404 belong 15 ColorSpace=CIE XYZ +>404 belong 16 ColorSpace=CIE Lab +>404 belong 17 ColorSpace=RGBW +>404 belong 18 ColorSpace=sGray +>404 belong 19 ColorSpace=sRGB +>404 belong 20 ColorSpace=AdobeRGB + + +# Cups Raster image format, Little Endian +1 string SaR +>0 string t Cups Raster version 1, Little Endian +>0 string 2 Cups Raster version 2, Little Endian +>0 string 3 Cups Raster version 3, Little Endian +!:mime application/vnd.cups-raster +>280 lelong x \b, %d +>284 lelong x \bx%d dpi +>376 lelong x \b, %dx +>380 lelong x \b%d pixels +>388 lelong x %d bits/color +>392 lelong x %d bits/pixel +>400 lelong 0 ColorOrder=Chunky +>400 lelong 1 ColorOrder=Banded +>400 lelong 2 ColorOrder=Planar +>404 lelong 0 ColorSpace=gray +>404 lelong 1 ColorSpace=RGB +>404 lelong 2 ColorSpace=RGBA +>404 lelong 3 ColorSpace=black +>404 lelong 4 ColorSpace=CMY +>404 lelong 5 ColorSpace=YMC +>404 lelong 6 ColorSpace=CMYK +>404 lelong 7 ColorSpace=YMCK +>404 lelong 8 ColorSpace=KCMY +>404 lelong 9 ColorSpace=KCMYcm +>404 lelong 10 ColorSpace=GMCK +>404 lelong 11 ColorSpace=GMCS +>404 lelong 12 ColorSpace=WHITE +>404 lelong 13 ColorSpace=GOLD +>404 lelong 14 ColorSpace=SILVER +>404 lelong 15 ColorSpace=CIE XYZ +>404 lelong 16 ColorSpace=CIE Lab +>404 lelong 17 ColorSpace=RGBW +>404 lelong 18 ColorSpace=sGray +>404 lelong 19 ColorSpace=sRGB +>404 lelong 20 ColorSpace=AdobeRGB diff --git a/contrib/file/Magdir/dact b/contrib/file/Magdir/dact index 5cca8d961601..04627c970389 100644 --- a/contrib/file/Magdir/dact +++ b/contrib/file/Magdir/dact @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: dact,v 1.4 2009/09/19 16:28:08 christos Exp $ # dact: file(1) magic for DACT compressed files # 0 long 0x444354C3 DACT compressed data diff --git a/contrib/file/Magdir/database b/contrib/file/Magdir/database index 0134355f3816..956a53ba4cf1 100644 --- a/contrib/file/Magdir/database +++ b/contrib/file/Magdir/database @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: database,v 1.28 2011/09/16 19:40:40 christos Exp $ # database: file(1) magic for various databases # # extracted from header/code files by Graeme Wilford (eep2gw@ee.surrey.ac.uk) @@ -267,3 +268,28 @@ >40 lequad x \b, bnum=%lld >48 lequad x \b, rnum=%lld >56 lequad x \b, fsiz=%lld + +# G-IR database made by gobject-introspect toolset, +# http://live.gnome.org/GObjectIntrospection +0 string GOBJ\nMETADATA\r\n\032 G-IR binary database +>16 byte x \b, v%d +>17 byte x \b.%d +>20 leshort x \b, %d entries +>22 leshort x \b/%d local + +# Type: QDBM Quick Database Manager +# From: Benoit Sibaud +0 string \\[depot\\]\n\f Quick Database Manager, little endian +0 string \\[DEPOT\\]\n\f Quick Database Manager, big endian + +# Type: TokyoCabinet database +# URL: http://tokyocabinet.sourceforge.net/ +# From: Benoit Sibaud +0 string ToKyO\ CaBiNeT\n TokyoCabinet database +>14 string x (version %s) + +# From: Stéphane Blondon http://www.yaal.fr +# Database file for Zope (done by FileStorage) +0 string FS21 Zope Object Database File Storage (data) +# Cache file for the database of Zope (done by ClientStorage) +0 string ZEC3 Zope Object Database Client Cache File (data) diff --git a/contrib/file/Magdir/diamond b/contrib/file/Magdir/diamond index 1abd01e0300f..39d1ed6258b2 100644 --- a/contrib/file/Magdir/diamond +++ b/contrib/file/Magdir/diamond @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: diamond,v 1.7 2009/09/19 16:28:08 christos Exp $ # diamond: file(1) magic for Diamond system # # ... diamond is a multi-media mail and electronic conferencing system.... diff --git a/contrib/file/Magdir/diff b/contrib/file/Magdir/diff index 291bae67f55e..73f0135df86a 100644 --- a/contrib/file/Magdir/diff +++ b/contrib/file/Magdir/diff @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: diff,v 1.12 2010/12/07 16:52:52 christos Exp $ # diff: file(1) magic for diff(1) output # 0 search/1 diff\ diff output text @@ -12,3 +14,16 @@ 0 search/1 Index: RCS/CVS diff output text !:mime text/x-diff + +# bsdiff: file(1) magic for bsdiff(1) output +0 string/t BSDIFF40 bsdiff(1) patch file + + +# unified diff +0 search/4096 ---\ +>&0 search/1024 \n +>>&0 search/1 +++\ +>>>&0 search/1024 \n +>>>>&0 search/1 @@ unified diff output text +!:mime text/x-diff +!:strength + 90 diff --git a/contrib/file/Magdir/digital b/contrib/file/Magdir/digital index 615ef7a58cda..b1b77dd8c75f 100644 --- a/contrib/file/Magdir/digital +++ b/contrib/file/Magdir/digital @@ -1,23 +1,40 @@ + +#------------------------------------------------------------------------------ +# $File: digital,v 1.10 2011/05/03 01:44:17 christos Exp $ # Digital UNIX - Info # 0 string =!\n________64E Alpha archive >22 string X -- out of date # -# Alpha COFF Based Executables -# The stripped stuff really needs to be an 8 byte (64 bit) compare, -# but this works -0 leshort 0x183 COFF format alpha ->22 leshort&020000 &010000 sharable library, ->22 leshort&020000 ^010000 dynamically linked, ->24 leshort 0410 pure ->24 leshort 0413 demand paged ->8 lelong >0 executable or object module, not stripped ->8 lelong 0 ->>12 lelong 0 executable or object module, stripped ->>12 lelong >0 executable or object module, not stripped ->27 byte >0 - version %d. ->26 byte >0 %d- ->28 leshort >0 %d + +0 leshort 0603 +>>24 leshort 0410 COFF format alpha pure +>>24 leshort 0413 COFF format alpha demand paged +>>>22 leshort&030000 !020000 executable +>>>22 leshort&020000 !0 dynamically linked +>>>16 lelong !0 not stripped +>>>16 lelong 0 stripped +>>>27 byte x - version %d +>>>26 byte x \b.%d +>>>28 byte x \b-%d +>>24 leshort 0407 COFF format alpha object +>>>22 leshort&030000 020000 shared library +>>>27 byte x - version %d +>>>26 byte x \b.%d +>>>28 byte x \b-%d + +# Basic recognition of Digital UNIX core dumps - Mike Bremford +# +# The actual magic number is just "Core", followed by a 2-byte version +# number; however, treating any file that begins with "Core" as a Digital +# UNIX core dump file may produce too many false hits, so we include one +# byte of the version number as well; DU 5.0 appears only to be up to +# version 2. +# +0 string Core\001 Alpha COFF format core dump (Digital UNIX) +>24 string >\0 \b, from '%s' +0 string Core\002 Alpha COFF format core dump (Digital UNIX) +>24 string >\0 \b, from '%s' # # The next is incomplete, we could tell more about this format, # but its not worth it. diff --git a/contrib/file/Magdir/dolby b/contrib/file/Magdir/dolby index 230f738a209d..fee287ccf13b 100644 --- a/contrib/file/Magdir/dolby +++ b/contrib/file/Magdir/dolby @@ -1,3 +1,6 @@ + +#------------------------------------------------------------------------------ +# $File: dolby,v 1.5 2009/09/19 16:28:08 christos Exp $ # ATSC A/53 aka AC-3 aka Dolby Digital # from http://www.atsc.org/standards/a_52a.pdf # corrections, additions, etc. are always welcome! diff --git a/contrib/file/Magdir/dump b/contrib/file/Magdir/dump index cef191a2de48..1ac91e22010c 100644 --- a/contrib/file/Magdir/dump +++ b/contrib/file/Magdir/dump @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: dump,v 1.11 2009/09/19 16:28:09 christos Exp $ # dump: file(1) magic for dump file format--for new and old dump filesystems # # We specify both byte orders in order to recognize byte-swapped dumps. diff --git a/contrib/file/Magdir/dyadic b/contrib/file/Magdir/dyadic index e8a9d25d8c0f..c1a2c3c53e39 100644 --- a/contrib/file/Magdir/dyadic +++ b/contrib/file/Magdir/dyadic @@ -1,8 +1,9 @@ #------------------------------------------------------------------------------ +# $File: dyadic,v 1.5 2010/09/20 18:55:20 rrt Exp $ # Dyadic: file(1) magic for Dyalog APL. # -0 byte 0xaa +0 byte 0xaa >1 byte <4 Dyalog APL >>1 byte 0x00 incomplete workspace >>1 byte 0x01 component file @@ -10,3 +11,36 @@ >>1 byte 0x03 workspace >>2 byte x version %d >>3 byte x .%d + +0 beshort 0xaa03 Dyalog APL +>2 byte x workspace type %d +>3 byte x subtype %d +>7 byte&0x28 0x00 32-bit +>7 byte&0x28 0x20 64-bit +>7 byte&0x0c 0x00 classic +>7 byte&0x0c 0x04 unicode +>7 byte&0x88 0x00 big-endian +>7 byte&0x88 0x80 little-endian + +0 byte 0xaa Dyalog APL +>1 byte 0x00 aplcore +>1 byte 0x01 component file 32-bit non-journaled non-checksummed +>1 byte 0x02 external variable exclusive +>1 byte 0x06 external variable shared +>1 byte 0x07 session +>1 byte 0x08 mapped file 32-bit +>1 byte 0x09 component file 64-bit non-journaled non-checksummed +>1 byte 0x0a mapped file 64-bit +>1 byte 0x0b component file 32-bit level 1 journaled non-checksummed +>1 byte 0x0c component file 64-bit level 1 journaled non-checksummed +>1 byte 0x0d component file 32-bit level 1 journaled checksummed +>1 byte 0x0e component file 64-bit level 1 journaled checksummed +>1 byte 0x0f component file 32-bit level 2 journaled checksummed +>1 byte 0x10 component file 64-bit level 2 journaled checksummed +>1 byte 0x11 component file 32-bit level 3 journaled checksummed +>1 byte 0x12 component file 64-bit level 3 journaled checksummed +>1 byte 0x13 component file 32-bit non-journaled checksummed +>1 byte 0x14 component file 64-bit non-journaled checksummed +>1 byte 0x80 DDB + +0 short 0x6060 Dyalog APL transfer diff --git a/contrib/file/Magdir/ebml b/contrib/file/Magdir/ebml new file mode 100644 index 000000000000..d5d174329a52 --- /dev/null +++ b/contrib/file/Magdir/ebml @@ -0,0 +1,8 @@ + +#------------------------------------------------------------------------------ +# $File: ebml,v 1.1 2010/07/02 00:07:03 christos Exp $ +# ebml: file(1) magic for various Extensible Binary Meta Language +# http://www.matroska.org/technical/specs/index.html#track +0 belong 0x1a45dfa3 EBML file +>4 search/b/100 \102\202 +>>&1 string x \b, creator %.8s diff --git a/contrib/file/Magdir/editors b/contrib/file/Magdir/editors index 0b15bf820ab6..4c5b8a669957 100644 --- a/contrib/file/Magdir/editors +++ b/contrib/file/Magdir/editors @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: editors,v 1.8 2009/09/19 16:28:09 christos Exp $ # T602 editor documents # by David Necas 0 string @CT\ T602 document data, diff --git a/contrib/file/Magdir/efi b/contrib/file/Magdir/efi index 4eec2eb93779..7335c5cefd51 100644 --- a/contrib/file/Magdir/efi +++ b/contrib/file/Magdir/efi @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: efi,v 1.4 2009/09/19 16:28:09 christos Exp $ # efi: file(1) magic for Universal EFI binaries 0 lelong 0x0ef1fab9 diff --git a/contrib/file/Magdir/elf b/contrib/file/Magdir/elf index c605495429a8..8e3b7bc74191 100644 --- a/contrib/file/Magdir/elf +++ b/contrib/file/Magdir/elf @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: elf,v 1.54 2011/12/17 17:16:29 christos Exp $ # elf: file(1) magic for ELF executables # # We have to check the byte order flag to see what byte order all the @@ -148,6 +149,9 @@ >>18 leshort 106 Analog Devices Blackfin, >>18 leshort 113 Altera Nios II, >>18 leshort 0xae META, +>>18 leshort 187 Tilera TILE64, +>>18 leshort 188 Tilera TILEPro, +>>18 leshort 191 Tilera TILE-Gx, >>18 leshort 0x3426 OpenRISC (obsolete), >>18 leshort 0x8472 OpenRISC (obsolete), >>18 leshort 0x9026 Alpha (unofficial), @@ -258,6 +262,9 @@ >>18 leshort 0x8472 OpenRISC (obsolete), >>18 beshort 94 Tensilica Xtensa, >>18 beshort 97 NatSemi 32k, +>>18 beshort 187 Tilera TILE64, +>>18 beshort 188 Tilera TILEPro, +>>18 beshort 191 Tilera TILE-Gx, >>18 beshort 0x18ad AVR32 (unofficial), >>18 beshort 0x9026 Alpha (unofficial), >>18 beshort 0xa390 IBM S/390 (obsolete), diff --git a/contrib/file/Magdir/encore b/contrib/file/Magdir/encore index 63cb5d4f9f42..ef82eed64b0c 100644 --- a/contrib/file/Magdir/encore +++ b/contrib/file/Magdir/encore @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: encore,v 1.6 2009/09/19 16:28:09 christos Exp $ # encore: file(1) magic for Encore machines # # XXX - needs to have the byte order specified (NS32K was little-endian, diff --git a/contrib/file/Magdir/epoc b/contrib/file/Magdir/epoc index 80229c47f38f..c67a8b66e7b6 100644 --- a/contrib/file/Magdir/epoc +++ b/contrib/file/Magdir/epoc @@ -1,11 +1,53 @@ + #------------------------------------------------------------------------------ +# $File: epoc,v 1.7 2009/09/19 16:28:09 christos Exp $ # EPOC : file(1) magic for EPOC documents [Psion Series 5/Osaris/Geofox 1] -# Stefan Praszalowicz (hpicollo@worldnet.fr) +# Stefan Praszalowicz and Peter Breitenlohner # Useful information for improving this file can be found at: # http://software.frodo.looijaard.name/psiconv/formats/Index.html -0 lelong 0x10000037 +#------------------------------------------------------------------------------ +0 lelong 0x10000037 Psion Series 5 +>4 lelong 0x10000039 font file +>4 lelong 0x1000003A printer driver +>4 lelong 0x1000003B clipboard +>4 lelong 0x10000042 multi-bitmap image +!:mime image/x-epoc-mbm +>4 lelong 0x1000006A application information file >4 lelong 0x1000006D ->>8 lelong 0x1000007F Psion Word ->>8 lelong 0x10000088 Psion Sheet ->>8 lelong 0x1000007D Psion Sketch ->>8 lelong 0x10000085 Psion TextEd +>>8 lelong 0x1000007D Sketch image +!:mime image/x-epoc-sketch +>>8 lelong 0x1000007E voice note +>>8 lelong 0x1000007F Word file +!:mime application/x-epoc-word +>>8 lelong 0x10000085 OPL program (TextEd) +!:mime application/x-epoc-opl +>>8 lelong 0x10000088 Sheet file +!:mime application/x-epoc-sheet +>>8 lelong 0x100001C4 EasyFax initialisation file +>4 lelong 0x10000073 OPO module +!:mime application/x-epoc-opo +>4 lelong 0x10000074 OPL application +!:mime application/x-epoc-app +>4 lelong 0x1000008A exported multi-bitmap image + +0 lelong 0x10000041 Psion Series 5 ROM multi-bitmap image + +0 lelong 0x10000050 Psion Series 5 +>4 lelong 0x1000006D database +>4 lelong 0x100000E4 ini file + +0 lelong 0x10000079 Psion Series 5 binary: +>4 lelong 0x00000000 DLL +>4 lelong 0x10000049 comms hardware library +>4 lelong 0x1000004A comms protocol library +>4 lelong 0x1000005D OPX +>4 lelong 0x1000006C application +>4 lelong 0x1000008D DLL +>4 lelong 0x100000AC logical device driver +>4 lelong 0x100000AD physical device driver +>4 lelong 0x100000E5 file transfer protocol +>4 lelong 0x100000E5 file transfer protocol +>4 lelong 0x10000140 printer definition +>4 lelong 0x10000141 printer definition + +0 lelong 0x1000007A Psion Series 5 executable diff --git a/contrib/file/Magdir/erlang b/contrib/file/Magdir/erlang index 6b2c4dcc3c66..b604a06828f1 100644 --- a/contrib/file/Magdir/erlang +++ b/contrib/file/Magdir/erlang @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: erlang,v 1.6 2010/09/20 19:19:17 rrt Exp $ # erlang: file(1) magic for Erlang JAM and BEAM files # URL: http://www.erlang.org/faq/x779.html#AEN812 @@ -16,3 +17,5 @@ 79 string Tue\ Jan\ 22\ 14:32:44\ MET\ 1991 Erlang JAM file - version 4.2 4 string 1.0\ Fri\ Feb\ 3\ 09:55:56\ MET\ 1995 Erlang JAM file - version 4.3 + +0 bequad 0x0000000000ABCDEF Erlang DETS file diff --git a/contrib/file/Magdir/esri b/contrib/file/Magdir/esri index 35c34923e056..e070e4f83f45 100644 --- a/contrib/file/Magdir/esri +++ b/contrib/file/Magdir/esri @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: esri,v 1.4 2009/09/19 16:28:09 christos Exp $ # ESRI Shapefile format (.shp .shx .dbf=DBaseIII) # Based on info from # diff --git a/contrib/file/Magdir/fcs b/contrib/file/Magdir/fcs index ac4b02c81cf8..613437f842c3 100644 --- a/contrib/file/Magdir/fcs +++ b/contrib/file/Magdir/fcs @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: fcs,v 1.4 2009/09/19 16:28:09 christos Exp $ # fcs: file(1) magic for FCS (Flow Cytometry Standard) data files # From Roger Leigh 0 string FCS1.0 Flow Cytometry Standard (FCS) data, version 1.0 diff --git a/contrib/file/Magdir/filesystems b/contrib/file/Magdir/filesystems index 93da429432aa..af9695b9a42a 100644 --- a/contrib/file/Magdir/filesystems +++ b/contrib/file/Magdir/filesystems @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: filesystems,v 1.61 2011/01/10 14:01:10 christos Exp $ # filesystems: file(1) magic for different filesystems # 0 string \366\366\366\366 PC formatted floppy with no filesystem @@ -202,7 +203,7 @@ # http://www.bcdwb.de/bcdw/index_e.htm >3 string BCDL >>498 string BCDL\ \ \ \ BIN \b, Bootable CD Loader (1.50Z) -# mbr partion table entries +# mbr partition table entries # OEM-ID does not contain MicroSoft,NEWLDR,DOS,SYSLINUX,or MTOOLs >3 string !MS >>3 string !SYSLINUX @@ -256,7 +257,7 @@ #>>>>>>>>>>>>496 ubeshort&1023 x \b, startcylinder? %d >>>>>>>>>>>>502 ulelong x \b, startsector %u >>>>>>>>>>>>506 ulelong x \b, %u sectors -# mbr partion table entries end +# mbr partition table entries end # http://www.acronis.de/ #FAT label=ACRONIS\ SZ #OEM-ID=BOOTWIZ0 @@ -882,13 +883,41 @@ # Minix filesystems - Juan Cespedes -0x410 leshort 0x137f Minix filesystem -0x410 beshort 0x137f Minix filesystem (big endian) ->0x402 beshort !0 \b, %d zones +0x410 leshort 0x137f +!:strength / 2 +>0x402 beshort < 100 Minix filesystem, V1, %d zones +>0x1e string minix \b, bootable +0x410 beshort 0x137f +!:strength / 2 +>0x402 beshort < 100 Minix filesystem, V1 (big endian), %d zones +>0x1e string minix \b, bootable +0x410 leshort 0x138f +!:strength / 2 +>0x402 beshort < 100 Minix filesystem, V1, 30 char names, %d zones +>0x1e string minix \b, bootable +0x410 beshort 0x138f +!:strength / 2 +>0x402 beshort < 100 Minix filesystem, V1, 30 char names (big endian), %d zones +>0x1e string minix \b, bootable +0x410 leshort 0x2468 +>0x402 beshort < 100 Minix filesystem, V2, %d zones +>0x1e string minix \b, bootable +0x410 beshort 0x2468 +>0x402 beshort < 100 Minix filesystem, V2 (big endian), %d zones +>0x1e string minix \b, bootable + +0x410 leshort 0x2478 +>0x402 beshort < 100 Minix filesystem, V2, 30 char names, %d zones +>0x1e string minix \b, bootable +0x410 leshort 0x2478 +>0x402 beshort < 100 Minix filesystem, V2, 30 char names, %d zones +>0x1e string minix \b, bootable +0x410 beshort 0x2478 +>0x402 beshort !0 Minix filesystem, V2, 30 char names (big endian), %d zones +>0x1e string minix \b, bootable +0x410 leshort 0x4d5a +>0x402 beshort !0 Minix filesystem, V3, %d zones >0x1e string minix \b, bootable -0x410 leshort 0x138f Minix filesystem, 30 char names -0x410 leshort 0x2468 Minix filesystem, version 2 -0x410 leshort 0x2478 Minix filesystem, version 2, 30 char names # romfs filesystems - Juan Cespedes 0 string -rom1fs- romfs filesystem, version 1 @@ -1084,6 +1113,8 @@ # ext2/ext3 filesystems - Andreas Dilger # ext4 filesystem - Eric Sandeen +# volume label and UUID Russell Coker +# http://etbe.coker.com.au/2008/07/08/label-vs-uuid-vs-device/ 0x438 leshort 0xEF53 Linux >0x44c lelong x rev %d >0x43e leshort x \b.%d @@ -1099,25 +1130,32 @@ # else large RO_COMPAT? >>>0x464 lelong >0x0000007 ext4 filesystem data # else large INCOMPAT? ->>0x460 lelong >0x000003f ext4 filesystem data +>>0x460 lelong >0x000003f ext4 filesystem data +>0x468 belong x \b, UUID=%08x +>0x46c beshort x \b-%04x +>0x46e beshort x \b-%04x +>0x470 beshort x \b-%04x +>0x472 belong x \b-%08x +>0x476 beshort x \b%04x +>0x478 string >0 \b, volume name "%s" # General flags for any ext* fs ->0x460 lelong &0x0000004 (needs journal recovery) ->0x43a leshort &0x0000002 (errors) +>0x460 lelong &0x0000004 (needs journal recovery) +>0x43a leshort &0x0000002 (errors) # INCOMPAT flags ->0x460 lelong &0x0000001 (compressed) -#>0x460 lelong &0x0000002 (filetype) -#>0x460 lelong &0x0000010 (meta bg) ->0x460 lelong &0x0000040 (extents) ->0x460 lelong &0x0000080 (64bit) -#>0x460 lelong &0x0000100 (mmp) -#>0x460 lelong &0x0000200 (flex bg) +>0x460 lelong &0x0000001 (compressed) +#>0x460 lelong &0x0000002 (filetype) +#>0x460 lelong &0x0000010 (meta bg) +>0x460 lelong &0x0000040 (extents) +>0x460 lelong &0x0000080 (64bit) +#>0x460 lelong &0x0000100 (mmp) +#>0x460 lelong &0x0000200 (flex bg) # RO_INCOMPAT flags -#>0x464 lelong &0x0000001 (sparse super) ->0x464 lelong &0x0000002 (large files) ->0x464 lelong &0x0000008 (huge files) -#>0x464 lelong &0x0000010 (gdt checksum) -#>0x464 lelong &0x0000020 (many subdirs) -#>0x463 lelong &0x0000040 (extra isize) +#>0x464 lelong &0x0000001 (sparse super) +>0x464 lelong &0x0000002 (large files) +>0x464 lelong &0x0000008 (huge files) +#>0x464 lelong &0x0000010 (gdt checksum) +#>0x464 lelong &0x0000020 (many subdirs) +#>0x463 lelong &0x0000040 (extra isize) # SGI disk labels - Nathan Scott 0 belong 0x0BE5A941 SGI disk label (volume header) @@ -1195,7 +1233,7 @@ # CDROM Filesystems # Modified for UDF by gerardo.cacciari@gmail.com -32769 string CD001 +32769 string CD001 # !:mime application/x-iso9660-image >38913 string !NSR0 ISO 9660 CD-ROM filesystem data >38913 string NSR0 UDF filesystem data @@ -1211,6 +1249,9 @@ !:mime application/x-iso9660-image 32776 string CDROM High Sierra CD-ROM filesystem data +# .cso files +0 string CISO Compressed ISO CD image + # cramfs filesystem - russell@coker.com.au 0 lelong 0x28cd3d45 Linux Compressed ROM File System data, little endian >4 lelong x size %lu @@ -1235,6 +1276,7 @@ # reiserfs - russell@coker.com.au 0x10034 string ReIsErFs ReiserFS V3.5 0x10034 string ReIsEr2Fs ReiserFS V3.6 +0x10034 string ReIsEr3Fs ReiserFS V3.6.19 >0x1002c leshort x block size %d >0x10032 leshort &2 (mounted or unclean) >0x10000 lelong x num blocks %d @@ -1255,11 +1297,70 @@ 0 string VoIP\ Startup\ and Aculab VoIP firmware >35 string x format %s -# u-boot/PPCBoot image file -# From: Mark Brown -0 belong 0x27051956 u-boot/PPCBoot image ->4 string PPCBoot ->>12 string x version %s +# From: Mark Brown [old] +# From: Behan Webster +0 belong 0x27051956 u-boot legacy uImage, +>32 string x %s, +>28 byte 0 Invalid os/ +>28 byte 1 OpenBSD/ +>28 byte 2 NetBSD/ +>28 byte 3 FreeBSD/ +>28 byte 4 4.4BSD/ +>28 byte 5 Linux/ +>28 byte 6 SVR4/ +>28 byte 7 Esix/ +>28 byte 8 Solaris/ +>28 byte 9 Irix/ +>28 byte 10 SCO/ +>28 byte 11 Dell/ +>28 byte 12 NCR/ +>28 byte 13 LynxOS/ +>28 byte 14 VxWorks/ +>28 byte 15 pSOS/ +>28 byte 16 QNX/ +>28 byte 17 Firmware/ +>28 byte 18 RTEMS/ +>28 byte 19 ARTOS/ +>28 byte 20 Unity OS/ +>28 byte 21 INTEGRITY/ +>29 byte 0 \bInvalid CPU, +>29 byte 1 \bAlpha, +>29 byte 2 \bARM, +>29 byte 3 \bIntel x86, +>29 byte 4 \bIA64, +>29 byte 5 \bMIPS, +>29 byte 6 \bMIPS 64-bit, +>29 byte 7 \bPowerPC, +>29 byte 8 \bIBM S390, +>29 byte 9 \bSuperH, +>29 byte 10 \bSparc, +>29 byte 11 \bSparc 64-bit, +>29 byte 12 \bM68K, +>29 byte 13 \bNios-32, +>29 byte 14 \bMicroBlaze, +>29 byte 15 \bNios-II, +>29 byte 16 \bBlackfin, +>29 byte 17 \bAVR32, +>29 byte 18 \bSTMicroelectronics ST200, +>30 byte 0 Invalid Image +>30 byte 1 Standalone Program +>30 byte 2 OS Kernel Image +>30 byte 3 RAMDisk Image +>30 byte 4 Multi-File Image +>30 byte 5 Firmware Image +>30 byte 6 Script File +>30 byte 7 Filesystem Image (any type) +>30 byte 8 Binary Flat Device Tree BLOB +>31 byte 0 (Not compressed), +>31 byte 1 (gzip), +>31 byte 2 (bzip2), +>31 byte 3 (lzma), +>12 belong x %d bytes, +>8 bedate x %s, +>16 belong x Load Address: 0x%08X, +>20 belong x Entry Point: 0x%08X, +>4 belong x Header CRC: 0x%08X, +>24 belong x Data CRC: 0x%08X # JFFS2 file system 0 leshort 0x1984 Linux old jffs2 filesystem data little endian @@ -1272,28 +1373,46 @@ >28 beshort <3 >>8 belong x %d bytes, >28 beshort >2 ->>63 bequad x %lld bytes, +>>28 beshort <4 +>>>63 bequad x %lld bytes, +>>28 beshort >3 +>>>40 bequad x %lld bytes, #>>67 belong x %d bytes, >4 belong x %d inodes, >28 beshort <2 >>32 beshort x blocksize: %d bytes, >28 beshort >1 ->>51 belong x blocksize: %d bytes, ->39 bedate x created: %s +>>28 beshort <4 +>>>51 belong x blocksize: %d bytes, +>>28 beshort >3 +>>>12 belong x blocksize: %d bytes, +>28 beshort <4 +>>39 bedate x created: %s +>28 beshort >3 +>>8 bedate x created: %s 0 string hsqs Squashfs filesystem, little endian, >28 leshort x version %d. >30 leshort x \b%d, >28 leshort <3 >>8 lelong x %d bytes, >28 leshort >2 ->>63 lequad x %lld bytes, +>>28 leshort <4 +>>>63 lequad x %lld bytes, +>>28 leshort >3 +>>>40 lequad x %lld bytes, #>>63 lelong x %d bytes, >4 lelong x %d inodes, >28 leshort <2 >>32 leshort x blocksize: %d bytes, >28 leshort >1 ->>51 lelong x blocksize: %d bytes, ->39 ledate x created: %s +>>28 leshort <4 +>>>51 lelong x blocksize: %d bytes, +>>28 leshort >3 +>>>12 lelong x blocksize: %d bytes, +>28 leshort <4 +>>39 ledate x created: %s +>28 leshort >3 +>>8 ledate x created: %s 0 string td\000 floppy image data (TeleDisk) @@ -1312,6 +1431,10 @@ >>>>>>>>&0 bedate !0 incremental since: %s #---------------------------------------------------------- +#delta ISO Daniel Novotny (dnovotny@redhat.com) +0 string DISO Delta ISO data +>4 belong x version %d + # VMS backup savesets - gerardo.cacciari@gmail.com # 4 string \x01\x00\x01\x00\x01\x00 @@ -1356,13 +1479,17 @@ 0 string CPQRFBLO Compaq/HP RILOE floppy image #------------------------------------------------------------------------------ -# Files-11 On-Disk Structure (OpenVMS file system) - gerardo.cacciari@gmail.com -# These bits come from LBN 1 (home block) of ODS-2 and ODS-5 volumes, which is -# mapped to VBN 2 of [000000]INDEXF.SYS;1 +# Files-11 On-Disk Structure (File system for various RSX-11 and VMS flavours). +# These bits come from LBN 1 (home block) of ODS-1, ODS-2 and ODS-5 volumes, +# which is mapped to VBN 2 of [000000]INDEXF.SYS;1 - gerardo.cacciari@gmail.com # -1008 string DECFILE11B Files-11 On-Disk Structure +1008 string DECFILE11 Files-11 On-Disk Structure >525 byte x Level %d ->525 byte x (ODS-%d OpenVMS file system), +>525 byte x (ODS-%d); +>1017 string A RSX-11, VAX/VMS or OpenVMS VAX file system; +>1017 string B +>>525 byte 2 VAX/VMS or OpenVMS file system; +>>525 byte 5 OpenVMS Alpha or Itanium file system; >984 string x volume label is '%-12.12s' # From: Thomas Klausner @@ -1377,9 +1504,13 @@ # From Eric Sandeen # GFS2 -0x10000 belong 0x01161970 GFS2 Filesystem ->0x10024 belong x (blocksize %d, ->0x10060 string >\0 lockproto %s) +0x10000 belong 0x01161970 +>0x10018 belong 0x0000051d GFS1 Filesystem +>>0x10024 belong x (blocksize %d, +>>0x10060 string >\0 lockproto %s) +>0x10018 belong 0x00000709 GFS2 Filesystem +>>0x10024 belong x (blocksize %d, +>>0x10060 string >\0 lockproto %s) # BTRFS 0x10040 string _BHRfS_M BTRFS Filesystem @@ -1392,3 +1523,49 @@ # dvdisaster's .ecc # From: "Nelson A. de Oliveira" 0 string *dvdisaster* dvdisaster error correction file + +# xfs metadump image +# mb_magic XFSM at 0; superblock magic XFSB at 1 << mb_blocklog +# but can we do the << ? For now it's always 512 (0x200) anyway. +0 string XFSM +>0x200 string XFSB XFS filesystem metadump image + +# Type: CROM filesystem +# From: Werner Fink +0 string CROMFS CROMFS +>6 string >\0 \b version %2.2s, +>8 ulequad >0 \b block data at %lld, +>16 ulequad >0 \b fblock table at %lld, +>24 ulequad >0 \b inode table at %lld, +>32 ulequad >0 \b root at %lld, +>40 ulelong >0 \b fblock size = %ld, +>44 ulelong >0 \b block size = %ld, +>48 ulequad >0 \b bytes = %lld + +# Type: xfs metadump image +# From: Daniel Novotny +# mb_magic XFSM at 0; superblock magic XFSB at 1 << mb_blocklog +# but can we do the << ? For now it's always 512 (0x200) anyway. +0 string XFSM +>0x200 string XFSB XFS filesystem metadump image + +# Type: delta ISO +# From: Daniel Novotny +0 string DISO Delta ISO data, +>4 belong x version %d + +# JFS2 (Journaling File System) image. (Old JFS1 has superblock at 0x1000.) +# See linux/fs/jfs/jfs_superblock.h for layout; see jfs_filsys.h for flags. +# From: Adam Buchbinder +0x8000 string JFS1 +# Because it's text-only magic, check a binary value (version) to be sure. +# Should always be 2, but mkfs.jfs writes it as 1. Needs to be 2 or 1 to be +# mountable. +>&0 lelong <3 JFS2 filesystem image +# Label is followed by a UUID; we have to limit string length to avoid +# appending the UUID in the case of a 16-byte label. +>>&144 regex [\x20-\x7E]{1,16} (label "%s") +>>&0 lequad x \b, %lld blocks +>>&8 lelong x \b, blocksize %d +>>&32 lelong&0x00000006 >0 (dirty) +>>&36 lelong >0 (compressed) diff --git a/contrib/file/Magdir/flash b/contrib/file/Magdir/flash index 0b985f22b7c8..dea35aed0bde 100644 --- a/contrib/file/Magdir/flash +++ b/contrib/file/Magdir/flash @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: flash,v 1.9 2009/11/08 01:30:01 christos Exp $ # flash: file(1) magic for Macromedia Flash file format # # See @@ -17,5 +18,8 @@ !:mime video/x-flv # +# Yosu Gomez +0 string AGD2\xbe\xb8\xbb\xcd\x00 Macromedia Freehand 7 Document +0 string AGD3\xbe\xb8\xbb\xcc\x00 Macromedia Freehand 8 Document # From Dave Wilson 0 string AGD4\xbe\xb8\xbb\xcb\x00 Macromedia Freehand 9 Document diff --git a/contrib/file/Magdir/fonts b/contrib/file/Magdir/fonts index 31ff8a2c8b1e..917d372e6ef6 100644 --- a/contrib/file/Magdir/fonts +++ b/contrib/file/Magdir/fonts @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: fonts,v 1.23 2010/09/20 18:55:20 rrt Exp $ # fonts: file(1) magic for font data # 0 search/1 FONT ASCII vfont text @@ -9,6 +11,9 @@ 0 string %!PS-AdobeFont-1. PostScript Type 1 font text >20 string >\0 (%s) 6 string %!PS-AdobeFont-1. PostScript Type 1 font program data +0 string %!FontType1 PostScript Type 1 font program data +6 string %!FontType1 PostScript Type 1 font program data +0 string %!PS-Adobe-3.0\ Resource-Font PostScript Type 1 font text # X11 font files in SNF (Server Natural Format) format 0 belong 00000004 X11 SNF font data, MSB first @@ -51,15 +56,30 @@ # True Type fonts 0 string \000\001\000\000\000 TrueType font data +!:mime application/x-font-ttf 0 string \007\001\001\000Copyright\ (c)\ 199 Adobe Multiple Master font 0 string \012\001\001\000Copyright\ (c)\ 199 Adobe Multiple Master font +# TrueType/OpenType font collections (.ttc) +# http://www.microsoft.com/typography/otspec/otff.htm 0 string ttcf TrueType font collection data +>4 belong 0x00010000 \b, 1.0 +>>8 belong >0 \b, %d fonts +>4 belong 0x00020000 \b, 2.0 +>>8 belong >0 \b, %d fonts +# 0x44454947 = 'DSIG' +>>>16 belong 0x44534947 \b, digitally signed # Opentype font data from Avi Bercovich -0 string OTTO OpenType font data +0 string OTTO OpenType font data +!:mime application/vnd.ms-opentype # Gürkan Sengün , www.linuks.mine.nu 0 string SplineFontDB: Spline Font Database +!:mime application/vnd.font-fontforge-sfd >14 string x version %s + +# EOT +34 string LP Embedded OpenType (EOT) +!:mime application/vnd.ms-fontobject diff --git a/contrib/file/Magdir/fortran b/contrib/file/Magdir/fortran index f42c7c8a8d52..e2ef0cdb754c 100644 --- a/contrib/file/Magdir/fortran +++ b/contrib/file/Magdir/fortran @@ -1,3 +1,6 @@ + +#------------------------------------------------------------------------------ +# $File: fortran,v 1.6 2009/09/19 16:28:09 christos Exp $ # FORTRAN source 0 regex/100 \^[Cc][\ \t] FORTRAN program !:mime text/x-fortran diff --git a/contrib/file/Magdir/frame b/contrib/file/Magdir/frame index 3699b44223fb..babe89027156 100644 --- a/contrib/file/Magdir/frame +++ b/contrib/file/Magdir/frame @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: frame,v 1.12 2009/09/19 16:28:09 christos Exp $ # frame: file(1) magic for FrameMaker files # # This stuff came on a FrameMaker demo tape, most of which is diff --git a/contrib/file/Magdir/freebsd b/contrib/file/Magdir/freebsd index be304177702a..a01ac4a28575 100644 --- a/contrib/file/Magdir/freebsd +++ b/contrib/file/Magdir/freebsd @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: freebsd,v 1.7 2009/09/19 16:28:09 christos Exp $ # freebsd: file(1) magic for FreeBSD objects # # All new-style FreeBSD magic numbers are in host byte order (i.e., diff --git a/contrib/file/Magdir/fsav b/contrib/file/Magdir/fsav index ccc6d596c21a..0a7a7f802056 100644 --- a/contrib/file/Magdir/fsav +++ b/contrib/file/Magdir/fsav @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: fsav,v 1.11 2009/09/19 16:28:09 christos Exp $ # fsav: file(1) magic for datafellows fsav virus definition files # Anthon van der Neut (anthon@mnt.org) diff --git a/contrib/file/Magdir/fusecompress b/contrib/file/Magdir/fusecompress new file mode 100644 index 000000000000..165cf3c7720a --- /dev/null +++ b/contrib/file/Magdir/fusecompress @@ -0,0 +1,12 @@ + +#------------------------------------------------------------------------------ +# $File: fusecompress,v 1.2 2011/08/08 09:05:55 christos Exp $ +# fusecompress: file(1) magic for fusecompress +0 string \037\135\211 FuseCompress(ed) data +>3 byte 0x00 (none format) +>3 byte 0x01 (bz2 format) +>3 byte 0x02 (gz format) +>3 byte 0x03 (lzo format) +>3 byte 0x04 (xor format) +>3 byte >0x04 (unknown format) +>4 long x uncompressed size: %d diff --git a/contrib/file/Magdir/games b/contrib/file/Magdir/games index 32ccdfeaab0c..e8116289cb5e 100644 --- a/contrib/file/Magdir/games +++ b/contrib/file/Magdir/games @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: games,v 1.13 2012/02/13 22:50:50 christos Exp $ # games: file(1) for games # Fabio Bonelli @@ -33,6 +35,7 @@ # Quake 0 string PACK Quake I or II world or extension +>8 lelong >0 \b, %d entries #0 string -1\x0a Quake I demo #>30 string x version %.4s @@ -152,6 +155,11 @@ 0 string =PWAD doom patch PWAD data >4 lelong x containing %d lumps +# Build engine group files (Duke Nukem, Shadow Warrior, ...) +# Extension: .grp +# Created by: "Ganael Laplanche" +0 string KenSilverman Build engine group file +>12 lelong x containing %d files # Summary: Warcraft 3 save # Extension: .w3g @@ -172,7 +180,7 @@ # Modified by (1): Abel Cheung (regex, more game format) # FIXME: Some games don't have GM (game type) 0 regex \\(;.*GM\\[[0-9]{1,2}\\] Smart Game Format ->2 search/0x200 GM[ +>2 search/0x200/b GM[ >>&0 string 1] (Go) >>&0 string 2] (Othello) >>&0 string 3] (chess) @@ -214,13 +222,6 @@ >>&0 string 39] (Gipf) >>&0 string 40] (Kropki) - -# Summary: Civilization 4 video -# Extension: .bik -# Created by: Abel Cheung -0 string BIKi Civilization 4 Video - - ############################################## # NetImmerse/Gamebryo game engine entries @@ -243,3 +244,22 @@ >&0 string n\ NetImmerse game engine file >>&0 regex [0-9a-z.]+ \b, version %s +# Type: SGF Smart Game Format +# URL: http://www.red-bean.com/sgf/ +# From: Eduardo Sabbatella +2 regex/c \\(;.*GM\\[[0-9]{1,2}\\] Smart Game Format +>2 regex/c GM\\[1\\] - Go Game +>2 regex/c GM\\[6\\] - BackGammon Game +>2 regex/c GM\\[11\\] - Hex Game +>2 regex/c GM\\[18\\] - Amazons Game +>2 regex/c GM\\[19\\] - Octi Game +>2 regex/c GM\\[20\\] - Gess Game +>2 regex/c GM\\[21\\] - twix Game + +# Epic Games/Unreal Engine Package +# +0 lelong 0x9E2A83C1 Unreal Engine Package, +>4 leshort x version: %i +>12 lelong !0 \b, names: %i +>28 lelong !0 \b, imports: %i +>20 lelong !0 \b, exports: %i diff --git a/contrib/file/Magdir/gcc b/contrib/file/Magdir/gcc index ee726f3a5ade..893d0d91e692 100644 --- a/contrib/file/Magdir/gcc +++ b/contrib/file/Magdir/gcc @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: gcc,v 1.4 2009/09/19 16:28:09 christos Exp $ # gcc: file(1) magic for GCC special files # 0 string gpch GCC precompiled header diff --git a/contrib/file/Magdir/geo b/contrib/file/Magdir/geo new file mode 100644 index 000000000000..924c71e93562 --- /dev/null +++ b/contrib/file/Magdir/geo @@ -0,0 +1,105 @@ + +#------------------------------------------------------------------------------ +# $File: geo,v 1.1 2010/02/23 23:40:07 christos Exp $ +# Geo- files from Kurt Schwehr + +###################################################################### +# +# Acoustic Doppler Current Profilers (ADCP) +# +###################################################################### + +0 beshort 0x7f7f RDI Acoustic Doppler Current Profiler (ADCP) + +###################################################################### +# +# Metadata +# +###################################################################### + +0 string Identification_Information FGDC ASCII metadata + +###################################################################### +# +# Seimsic / Subbottom +# +###################################################################### + +# Knudsen subbottom chirp profiler - Binary File Format: B9 +# KEB D409-03167 V1.75 Huffman +0 string KEB\ Knudsen seismic KEL binary (KEB) - +>4 regex [-A-Z0-9]* Software: %s +>>&1 regex V[0-9]*\.[0-9]* version %s + +###################################################################### +# +# LIDAR - Laser altimetry or bathy +# +###################################################################### + + +# Caris LIDAR format for LADS comes as two parts... ascii location file and binary waveform data +0 string HCA LADS Caris Ascii Format (CAF) bathymetric lidar +>4 regex [0-9]*\.[0-9]* version %s + +0 string HCB LADS Caris Binary Format (CBF) bathymetric lidar waveform data +>3 byte x version %d . +>4 byte x %d + + +###################################################################### +# +# MULTIBEAM SONARS http://www.ldeo.columbia.edu/res/pi/MB-System/formatdoc/ +# +###################################################################### + +# GeoAcoustics - GeoSwath Plus +4 beshort 0x2002 GeoSwath RDF +0 string Start:- GeoSwatch auf text file + +# Seabeam 2100 +# mbsystem code mb41 +0 string SB2100 SeaBeam 2100 multibeam sonar +0 string SB2100DR SeaBeam 2100 DR multibeam sonar +0 string SB2100PR SeaBeam 2100 PR multibeam sonar + +# This corresponds to MB-System format 94, L-3/ELAC/SeaBeam XSE vendor +# format. It is the format of our upgraded SeaBeam 2112 on R/V KNORR. +0 string $HSF XSE multibeam + +# mb121 http://www.saic.com/maritime/gsf/ +8 string GSF-v SAIC generic sensor format (GSF) sonar data, +>&0 regex [0-9]*\.[0-9]* version %s + +# MGD77 - http://www.ngdc.noaa.gov/mgg/dat/geodas/docs/mgd77.htm +# mb161 +9 string MGD77 MGD77 Header, Marine Geophysical Data Exchange Format + +# MBSystem processing caches the mbinfo output +1 string Swath\ Data\ File: mbsystem info cache + +# Caris John Hughes Clark format +0 string HDCS Caris multibeam sonar related data +1 string Start/Stop\ parameter\ header: Caris ASCII project summary + +###################################################################### +# +# Visualization and 3D modeling +# +###################################################################### + +# IVS - IVS3d.com Tagged Data Represetation +0 string %%\ TDR\ 2.0 IVS Fledermaus TDR file + +# http://www.ecma-international.org/publications/standards/Ecma-363.htm +# 3D in PDFs +0 string U3D ECMA-363, Universal 3D + +###################################################################### +# +# Support files +# +###################################################################### + +# https://midas.psi.ch/elog/ +0 string $@MID@$ elog journal entry diff --git a/contrib/file/Magdir/geos b/contrib/file/Magdir/geos index af1df7b71945..66c2bd1a2904 100644 --- a/contrib/file/Magdir/geos +++ b/contrib/file/Magdir/geos @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: geos,v 1.4 2009/09/19 16:28:09 christos Exp $ # GEOS files (Vidar Madsen, vidar@gimp.org) # semi-commonly used in embedded and handheld systems. 0 belong 0xc745c153 GEOS diff --git a/contrib/file/Magdir/gimp b/contrib/file/Magdir/gimp index 674bbfba62e2..a360bd8e124b 100644 --- a/contrib/file/Magdir/gimp +++ b/contrib/file/Magdir/gimp @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: gimp,v 1.7 2010/09/20 18:55:20 rrt Exp $ # GIMP Gradient: file(1) magic for the GIMP's gradient data files # by Federico Mena @@ -10,6 +12,7 @@ # ('Bucky' LaDieu, nega@vt.edu) 0 string gimp\ xcf GIMP XCF image data, +!:mime image/x-xcf >9 string file version 0, >9 string v version >>10 string >\0 %s, diff --git a/contrib/file/Magdir/gnome-keyring b/contrib/file/Magdir/gnome-keyring index 80a4f882c167..463688fe7dcb 100644 --- a/contrib/file/Magdir/gnome-keyring +++ b/contrib/file/Magdir/gnome-keyring @@ -1,3 +1,6 @@ + +#------------------------------------------------------------------------------ +# $File: gnome-keyring,v 1.2 2009/09/19 16:28:09 christos Exp $ # GNOME keyring # Contributed by Josh Triplett # FIXME: Could be simplified if pstring supported two-byte counts diff --git a/contrib/file/Magdir/gnu b/contrib/file/Magdir/gnu index 66c670c55fa7..4789c0a2a667 100644 --- a/contrib/file/Magdir/gnu +++ b/contrib/file/Magdir/gnu @@ -1,17 +1,28 @@ + #------------------------------------------------------------------------------ +# $File: gnu,v 1.13 2012/01/03 17:16:54 christos Exp $ # gnu: file(1) magic for various GNU tools # # GNU nlsutils message catalog file format # +# GNU message catalog (.mo and .gmo files) + 0 string \336\22\4\225 GNU message catalog (little endian), ->4 lelong x revision %d, ->8 lelong x %d messages +>6 leshort x revision %d. +>4 leshort >0 \b%d, +>>8 lelong x %d messages, +>>36 lelong x %d sysdep messages +>4 leshort =0 \b%d, +>>8 lelong x %d messages + 0 string \225\4\22\336 GNU message catalog (big endian), ->4 belong x revision %d, ->8 belong x %d messages -# message catalogs, from Mitchum DSouza -0 string *nazgul* Nazgul style compiled message catalog ->8 lelong >0 \b, version %ld +>4 beshort x revision %d. +>6 beshort >0 \b%d, +>>8 belong x %d messages, +>>36 belong x %d sysdep messages +>6 beshort =0 \b%d, +>>8 belong x %d messages + # GnuPG # The format is very similar to pgp @@ -40,3 +51,7 @@ # Files produced by GNU gettext 0 long 0xDE120495 GNU-format message catalog data 0 long 0x950412DE GNU-format message catalog data + +# gettext message catalogue +0 regex \^msgid\ GNU gettext message catalogue text +!:mime text/x-po diff --git a/contrib/file/Magdir/gnumeric b/contrib/file/Magdir/gnumeric index 76dfa90684eb..928ad3eed12a 100644 --- a/contrib/file/Magdir/gnumeric +++ b/contrib/file/Magdir/gnumeric @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: gnumeric,v 1.4 2009/09/19 16:28:09 christos Exp $ # gnumeric: file(1) magic for Gnumeric spreadsheet # This entry is only semi-helpful, as Gnumeric compresses its files, so # they will ordinarily reported as "compressed", but at least -z helps diff --git a/contrib/file/Magdir/grace b/contrib/file/Magdir/grace index a5f143359ec3..25bd759edca7 100644 --- a/contrib/file/Magdir/grace +++ b/contrib/file/Magdir/grace @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: grace,v 1.4 2009/09/19 16:28:09 christos Exp $ # ACE/gr and Grace type files - PLEASE DO NOT REMOVE THIS LINE # # ACE/gr binary diff --git a/contrib/file/Magdir/graphviz b/contrib/file/Magdir/graphviz index 831a00259506..b944d4637c6b 100644 --- a/contrib/file/Magdir/graphviz +++ b/contrib/file/Magdir/graphviz @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: graphviz,v 1.7 2009/09/19 16:28:09 christos Exp $ # graphviz: file(1) magic for http://www.graphviz.org/ # FIXME: These patterns match too generally. For example, the first diff --git a/contrib/file/Magdir/gringotts b/contrib/file/Magdir/gringotts index 6e833a3e1d1b..2bfef1b7f7de 100644 --- a/contrib/file/Magdir/gringotts +++ b/contrib/file/Magdir/gringotts @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: gringotts,v 1.5 2009/09/19 16:28:09 christos Exp $ # gringotts: file(1) magic for Gringotts # http://devel.pluto.linux.it/projects/Gringotts/ # author: Germano Rizzo diff --git a/contrib/file/Magdir/guile b/contrib/file/Magdir/guile new file mode 100644 index 000000000000..7ee0c48b2c8b --- /dev/null +++ b/contrib/file/Magdir/guile @@ -0,0 +1,13 @@ + +#------------------------------------------------------------------------------ +# $File: guile,v 1.1 2011/12/16 17:44:33 christos Exp $ +# Guile file magic from +# http://www.gnu.org/s/guile/ +# http://git.savannah.gnu.org/gitweb/?p=guile.git;f=libguile/_scm.h;hb=HEAD#l250 + +0 string GOOF---- Guile Object +>8 string LE \b, little endian +>8 string BE \b, big endian +>11 string 4 \b, 32bit +>11 string 8 \b, 64bit +>13 regex .\.. \b, bytecode v%s diff --git a/contrib/file/Magdir/hitachi-sh b/contrib/file/Magdir/hitachi-sh index a096eebf1f19..96067e9ba524 100644 --- a/contrib/file/Magdir/hitachi-sh +++ b/contrib/file/Magdir/hitachi-sh @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: hitachi-sh,v 1.5 2009/09/19 16:28:09 christos Exp $ # hitach-sh: file(1) magic for Hitachi Super-H # # Super-H COFF diff --git a/contrib/file/Magdir/hp b/contrib/file/Magdir/hp index 2d064cc9971a..3201c1567c8a 100644 --- a/contrib/file/Magdir/hp +++ b/contrib/file/Magdir/hp @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: hp,v 1.23 2009/09/19 16:28:09 christos Exp $ # hp: file(1) magic for Hewlett Packard machines (see also "printer") # # XXX - somebody should figure out whether any byte order needs to be diff --git a/contrib/file/Magdir/human68k b/contrib/file/Magdir/human68k index d8070f725d04..b3d66ce5d089 100644 --- a/contrib/file/Magdir/human68k +++ b/contrib/file/Magdir/human68k @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: human68k,v 1.5 2009/09/19 16:28:09 christos Exp $ # human68k: file(1) magic for Human68k (X680x0 DOS) binary formats # Magic too short! #0 string HU Human68k diff --git a/contrib/file/Magdir/ibm370 b/contrib/file/Magdir/ibm370 index 8cd9da27ae6a..37d17bd8a549 100644 --- a/contrib/file/Magdir/ibm370 +++ b/contrib/file/Magdir/ibm370 @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: ibm370,v 1.8 2009/09/19 16:28:09 christos Exp $ # ibm370: file(1) magic for IBM 370 and compatibles. # # "ibm370" said that 0x15d == 0535 was "ibm 370 pure executable". diff --git a/contrib/file/Magdir/ibm6000 b/contrib/file/Magdir/ibm6000 index 6870c3121ade..72755fafc498 100644 --- a/contrib/file/Magdir/ibm6000 +++ b/contrib/file/Magdir/ibm6000 @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: ibm6000,v 1.9 2009/09/19 16:28:09 christos Exp $ # ibm6000: file(1) magic for RS/6000 and the RT PC. # 0 beshort 0x01df executable (RISC System/6000 V3.1) or obj module diff --git a/contrib/file/Magdir/iff b/contrib/file/Magdir/iff index 4d2a832c6e95..b991ab749306 100644 --- a/contrib/file/Magdir/iff +++ b/contrib/file/Magdir/iff @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: iff,v 1.13 2011/09/06 11:00:06 christos Exp $ # iff: file(1) magic for Interchange File Format (see also "audio" & "images") # # Daniel Quinlan (quinlan@yggdrasil.com) -- IFF was designed by Electronic @@ -51,6 +52,13 @@ >8 string AMFF \b, AMFF AmigaMetaFile format >8 string WZRD \b, WZRD StormWIZARD resource >8 string DOC\ \b, DOC desktop publishing document +>8 string WVQA \b, Westwood Studios VQA Multimedia, +>>24 leshort x %d video frames, +>>26 leshort x %d x +>>28 leshort x %d +>8 string MOVE \b, Wing Commander III Video +>>12 string _PC_ \b, PC version +>>12 string 3DO_ \b, 3DO version # These go at the end of the iff rules # diff --git a/contrib/file/Magdir/images b/contrib/file/Magdir/images index 7eacf862fa04..1d4a0232a902 100644 --- a/contrib/file/Magdir/images +++ b/contrib/file/Magdir/images @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: images,v 1.72 2011/12/08 12:12:46 rrt Exp $ # images: file(1) magic for image formats (see also "iff", and "c-lang" for # XPM bitmaps) # @@ -32,7 +34,7 @@ # The next byte following the magic is always whitespace. 0 search/1 P1 Netpbm PBM image text !:mime image/x-portable-bitmap -0 search/1 P2 Netpbm PGM image text +0 search/1b P2 Netpbm PGM image text !:mime image/x-portable-greymap 0 search/1 P3 Netpbm PPM image text !:mime image/x-portable-pixmap @@ -61,6 +63,25 @@ 0 string IIN1 NIFF image data !:mime image/x-niff +# Canon RAW version 1 (CRW) files are a type of Canon Image File Format +# (CIFF) file. These are apparently all little-endian. +# From: Adam Buchbinder +# URL: http://www.sno.phy.queensu.ca/~phil/exiftool/canon_raw.html +0 string II\x1a\0\0\0HEAPCCDR Canon CIFF raw image data +!:mime image/x-canon-crw +>16 leshort x \b, version %d. +>14 leshort x \b%d + +# Canon RAW version 2 (CR2) files are a kind of TIFF with an extra magic +# number. Put this above the TIFF test to make sure we detect them. +# These are apparently all little-endian. +# From: Adam Buchbinder +# URL: http://libopenraw.freedesktop.org/wiki/Canon_CR2 +0 string II\x2a\0\x10\0\0\0CR Canon CR2 raw image data +!:mime image/x-canon-cr2 +>10 byte x \b, version %d. +>11 byte x \b%d + # Tag Image File Format, from Daniel Quinlan (quinlan@yggdrasil.com) # The second word of TIFF files is the TIFF version number, 42, which has # never changed. The TIFF specification recommends testing for it. @@ -69,13 +90,18 @@ 0 string II\x2a\x00 TIFF image data, little-endian !:mime image/tiff +0 string MM\x00\x2b Big TIFF image data, big-endian +!:mime image/tiff +0 string II\x2b\x00 Big TIFF image data, little-endian +!:mime image/tiff + # PNG [Portable Network Graphics, or "PNG's Not GIF"] images # (Greg Roelofs, newt@uchicago.edu) # (Albert Cahalan, acahalan@cs.uml.edu) # # 137 P N G \r \n ^Z \n [4-byte length] H E A D [HEAD data] [HEAD crc] ... # -0 string \x89PNG\x0d\x0a\x1a\x0a PNG image +0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data !:mime image/png >16 belong x \b, %ld x >20 belong x %ld, @@ -207,8 +233,8 @@ #0 string BA PC bitmap array data # XPM icons (Greg Roelofs, newt@uchicago.edu) -# note possible collision with C/REXX entry in c-lang; currently commented out 0 search/1 /*\ XPM\ */ X pixmap image text +!:mime image/x-xpmi # Utah Raster Toolkit RLE images (janl@ifi.uio.no) 0 leshort 0xcc52 RLE image data, @@ -308,11 +334,20 @@ # As described in /usr/X11R6/include/X11/XWDFile.h # used by the xwd program. # Bradford Castalia, idaeim, 1/01 -4 belong 7 XWD X Window Dump image data ->100 string >\0 \b, "%s" ->16 belong x \b, %dx ->20 belong x \b%dx ->12 belong x \b%d +# updated by Adam Buchbinder, 2/09 +# The following assumes version 7 of the format; the first long is the length +# of the header, which is at least 25 4-byte longs, and the one at offset 8 +# is a constant which is always either 1 or 2. Offset 12 is the pixmap depth, +# which is a maximum of 32. +0 belong >100 +>8 belong <3 +>>12 belong <33 +>>>4 belong 7 XWD X Window Dump image data +!:mime image/x-xwindowdump +>>>>100 string >\0 \b, "%s" +>>>>16 belong x \b, %dx +>>>>20 belong x \b%dx +>>>>12 belong x \b%d # PDS - Planetary Data System # These files use Parameter Value Language in the header section. @@ -400,8 +435,28 @@ >2 byte 1 RLE compressed # Adobe Photoshop +# From: Asbjoern Sloth Toennesen 0 string 8BPS Adobe Photoshop Image !:mime image/vnd.adobe.photoshop +>4 beshort 2 (PSB) +>18 belong x \b, %d x +>14 belong x %d, +>24 beshort 0 bitmap +>24 beshort 1 grayscale +>>12 beshort 2 with alpha +>24 beshort 2 indexed +>24 beshort 3 RGB +>>12 beshort 4 \bA +>24 beshort 4 CMYK +>>12 beshort 5 \bA +>24 beshort 7 multichannel +>24 beshort 8 duotone +>24 beshort 9 lab +>12 beshort > 1 +>>12 beshort x \b, %dx +>12 beshort 1 \b, +>22 beshort x %d-bit channel +>12 beshort > 1 \bs # XV thumbnail indicator (ThMO) 0 string P7\ 332 XV thumbnail image data @@ -529,11 +584,16 @@ # Bio-Rad .PIC is an image format used by microscope control systems # and related image processing software used by biologists. # From: Vebjorn Ljosa -54 leshort 12345 Bio-Rad .PIC Image File ->0 leshort >0 %hd x ->2 leshort >0 %hd, ->4 leshort =1 1 image in file ->4 leshort >1 %hd images in file +# BOOL values are two-byte integers; use them to rule out false positives. +# http://web.archive.org/web/20050317223257/www.cs.ubc.ca/spider/ladic/text/biorad.txt +# Samples: http://www.loci.wisc.edu/software/sample-data +14 leshort <2 +>62 leshort <2 +>>54 leshort 12345 Bio-Rad .PIC Image File +>>>0 leshort >0 %hd x +>>>2 leshort >0 %hd, +>>>4 leshort =1 1 image in file +>>>4 leshort >1 %hd images in file # From Jan "Yenya" Kasprzak # The description of *.mrw format can be found at @@ -546,11 +606,14 @@ # Submitted by: Stephane Loeuillet # Modified by (1): Abel Cheung 0 string AT&TFORM -!:mime image/vnd.djvu >12 string DJVM DjVu multiple page document +!:mime image/vnd.djvu >12 string DJVU DjVu image or single page document +!:mime image/vnd.djvu >12 string DJVI DjVu shared document +!:mime image/vnd.djvu >12 string THUM DjVu page thumbnails +!:mime image/vnd.djvu # From Marc Espie @@ -565,7 +628,7 @@ # specifications at http://hdf.ncsa.uiuc.edu/ 0 belong 0x0e031301 Hierarchical Data Format (version 4) data !:mime application/x-hdf -0 string \211HDF\r\n\032 Hierarchical Data Format (version 5) data +0 string \211HDF\r\n\032\n Hierarchical Data Format (version 5) data !:mime application/x-hdf # From: Tobias Burnus @@ -605,3 +668,68 @@ # JPEG 2000 Code Stream Bitmap # From Petr Splichal 0 string \xFF\x4F\xFF\x51\x00 JPEG-2000 Code Stream Bitmap data + +# From: Rick Richardson +0 string GARMIN\ BITMAP\ 01 Garmin Bitmap file + +# Type: Ulead Photo Explorer5 (.pe5) +# URL: http://www.jisyo.com/cgibin/view.cgi?EXT=pe5 (Japanese) +# From: Simon Horman +0 string IIO2H Ulead Photo Explorer5 + +# Type: X11 cursor +# URL: http://webcvs.freedesktop.org/mime/shared-mime-info/freedesktop.org.xml.in?view=markup +# From: Mathias Brodala +0 string Xcur X11 cursor + +# Type: Olympus ORF raw images. +# URL: http://libopenraw.freedesktop.org/wiki/Olympus_ORF +# From: Adam Buchbinder +0 string MMOR Olympus ORF raw image data, big-endian +!:mime image/x-olympus-orf +0 string IIRO Olympus ORF raw image data, little-endian +!:mime image/x-olympus-orf +0 string IIRS Olympus ORF raw image data, little-endian +!:mime image/x-olympus-orf + +# Type: files used in modern AVCHD camcoders to store clip information +# Extension: .cpi +# From: Alexander Danilov +0 string HDMV0100 AVCHD Clip Information + +# From: Adam Buchbinder +# URL: http://local.wasp.uwa.edu.au/~pbourke/dataformats/pic/ +# Radiance HDR; usually has .pic or .hdr extension. +0 string #?RADIANCE\n Radiance HDR image data +#!mime image/vnd.radiance + +# From: Adam Buchbinder +# URL: http://www.mpi-inf.mpg.de/resources/pfstools/pfs_format_spec.pdf +# Used by the pfstools packages. The regex matches for the image size could +# probably use some work. The MIME type is made up; if there's one in +# actual common use, it should replace the one below. +0 string PFS1\x0a PFS HDR image data +#!mime image/x-pfs +>1 regex [0-9]*\ \b, %s +>>1 regex \ [0-9]{4} \bx%s + +# Type: Foveon X3F +# URL: http://www.photofo.com/downloads/x3f-raw-format.pdf +# From: Adam Buchbinder +# Note that the MIME type isn't defined anywhere that I can find; if +# there's a canonical type for this format, it should replace this one. +0 string FOVb Foveon X3F raw image data +!:mime image/x-x3f +>6 leshort x \b, version %d. +>4 leshort x \b%d +>28 lelong x \b, %dx +>32 lelong x \b%d + +# Paint.NET file +# From Adam Buchbinder +0 string PDN3 Paint.NET image data +!:mime image/x-paintnet + +# Not really an image. +# From: "Tano M. Fotang" +0 string \x46\x4d\x52\x00 ISO/IEC 19794-2 Format Minutiae Record (FMR) diff --git a/contrib/file/Magdir/inform b/contrib/file/Magdir/inform index ba35d61e046c..fe518ece9160 100644 --- a/contrib/file/Magdir/inform +++ b/contrib/file/Magdir/inform @@ -1,8 +1,9 @@ #------------------------------------------------------------------------------ +# $File: inform,v 1.5 2009/09/19 16:28:09 christos Exp $ # inform: file(1) magic for Inform interactive fiction language # URL: http://www.inform-fiction.org/ # From: Reuben Thomas -0 search/cB/100 constant\ story Inform source text +0 search/100/cW constant\ story Inform source text diff --git a/contrib/file/Magdir/intel b/contrib/file/Magdir/intel index 00942c2b2e0a..47812a0ea351 100644 --- a/contrib/file/Magdir/intel +++ b/contrib/file/Magdir/intel @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: intel,v 1.10 2011/03/30 19:51:00 christos Exp $ # intel: file(1) magic for x86 Unix # # Various flavors of x86 UNIX executable/object (other than Xenix, which @@ -45,3 +46,8 @@ >28 string Adaptec Adaptec >42 string PROMISE Promise >2 byte x (%d*512) + +# Flash descriptors for Intel SPI flash roms. +# From Dr. Jesus +0 lelong 0x0ff0a55a Intel serial flash for ICH/PCH ROM <= 5 or 3400 series A-step +16 lelong 0x0ff0a55a Intel serial flash for PCH ROM diff --git a/contrib/file/Magdir/interleaf b/contrib/file/Magdir/interleaf index 3eea3cff2690..8e3aaf57da5e 100644 --- a/contrib/file/Magdir/interleaf +++ b/contrib/file/Magdir/interleaf @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: interleaf,v 1.10 2009/09/19 16:28:10 christos Exp $ # interleaf: file(1) magic for InterLeaf TPS: # 0 string =\210OPS Interleaf saved data diff --git a/contrib/file/Magdir/island b/contrib/file/Magdir/island index 9903cddf9217..f40521a0367f 100644 --- a/contrib/file/Magdir/island +++ b/contrib/file/Magdir/island @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: island,v 1.5 2009/09/19 16:28:10 christos Exp $ # island: file(1) magic for IslandWite/IslandDraw, from SunOS 5.5.1 # "/etc/magic": # From: guy@netapp.com (Guy Harris) diff --git a/contrib/file/Magdir/ispell b/contrib/file/Magdir/ispell index 592f064ed63c..57a6e9e78988 100644 --- a/contrib/file/Magdir/ispell +++ b/contrib/file/Magdir/ispell @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: ispell,v 1.8 2009/09/19 16:28:10 christos Exp $ # ispell: file(1) magic for ispell # # Ispell 3.0 has a magic of 0x9601 and ispell 3.1 has 0x9602. This magic diff --git a/contrib/file/Magdir/isz b/contrib/file/Magdir/isz new file mode 100644 index 000000000000..316bbd4acf69 --- /dev/null +++ b/contrib/file/Magdir/isz @@ -0,0 +1,15 @@ + +#------------------------------------------------------------------------------ +# $File: isz,v 1.1 2010/03/27 16:17:09 christos Exp $ +# ISO Zipped file format +# http://www.ezbsystems.com/isz/iszspec.txt +0 string IsZ! ISO Zipped file +>4 byte x \b, header size %u +>5 byte x \b, version %u +>8 lelong x \b, serial %u +#12 leshort x \b, sector size %u +#>16 lelong x \b, total sectors %u +>17 byte >0 \b, password protected +#>24 lequad x \b, segment size %llu +#>32 lelong x \b, blocks %u +#>36 lelong x \b, block size %u diff --git a/contrib/file/Magdir/java b/contrib/file/Magdir/java index cca5542c1f69..0aca86a5e19a 100644 --- a/contrib/file/Magdir/java +++ b/contrib/file/Magdir/java @@ -1,4 +1,6 @@ + #------------------------------------------------------------ +# $File: java,v 1.13 2011/12/08 12:12:46 rrt Exp $ # Java ByteCode and Mach-O binaries (e.g., Mac OS X) use the # same magic number, 0xcafebabe, so they are both handled # in the entry called "cafebabe". @@ -22,3 +24,6 @@ >0 regex dey\n[0-9][0-9][0-9]\0 Dalvik dex file (optimized for host) >4 string >000 version %s +# Java source +0 regex ^import.*;$ Java source +!:mime text/x-java diff --git a/contrib/file/Magdir/jpeg b/contrib/file/Magdir/jpeg index 4470be4adf9a..7814245add34 100644 --- a/contrib/file/Magdir/jpeg +++ b/contrib/file/Magdir/jpeg @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: jpeg,v 1.16 2011/01/04 19:29:32 rrt Exp $ # JPEG images # SunOS 5.5.1 had # @@ -126,13 +127,8 @@ # And if there was some sort of looping construct to do searches, plus a few # named accumulators, it would be even more effective... # At least we can show a comment if no other segments got inserted before: ->(4.S+5) byte 0xFE ->>(4.S+8) string >\0 \b, comment: "%s" -# FIXME: When we can do non-byte counted strings, we can use that to get -# the string's count, and fix Debian bug #283760 -#>(4.S+5) byte 0xFE \b, comment -#>>(4.S+6) beshort x \b length=%d -#>>(4.S+8) string >\0 \b, "%s" +>(4.S+5) byte 0xFE \b, comment: +>>(4.S+6) pstring/HJ x "%s" # Or, we can show the encoding type (I've included only the three most common) # and image dimensions if we are lucky and the SOFn (image segment) is here: >(4.S+5) byte 0xC0 \b, baseline diff --git a/contrib/file/Magdir/karma b/contrib/file/Magdir/karma index 89e77727ea4e..007a4b74b220 100644 --- a/contrib/file/Magdir/karma +++ b/contrib/file/Magdir/karma @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: karma,v 1.6 2009/09/19 16:28:10 christos Exp $ # karma: file(1) magic for Karma data files # # From diff --git a/contrib/file/Magdir/kde b/contrib/file/Magdir/kde index d81ee693fd5e..dda5819a9bad 100644 --- a/contrib/file/Magdir/kde +++ b/contrib/file/Magdir/kde @@ -1,10 +1,11 @@ #------------------------------------------------------------------------------ +# $File: kde,v 1.5 2010/11/25 15:00:12 christos Exp $ # kde: file(1) magic for KDE -0 string [KDE\ Desktop\ Entry] KDE desktop entry +0 string/t [KDE\ Desktop\ Entry] KDE desktop entry !:mime application/x-kdelnk -0 string #\ KDE\ Config\ File KDE config file +0 string/t #\ KDE\ Config\ File KDE config file !:mime application/x-kdelnk -0 string #\ xmcd xmcd database file for kscd +0 string/t #\ xmcd xmcd database file for kscd !:mime text/x-xmcd diff --git a/contrib/file/Magdir/kml b/contrib/file/Magdir/kml index 5b59b9e55501..ed0f42ed8533 100644 --- a/contrib/file/Magdir/kml +++ b/contrib/file/Magdir/kml @@ -1,10 +1,12 @@ + #------------------------------------------------------------------------------ +# $File: kml,v 1.3 2010/11/25 15:00:12 christos Exp $ # Type: Google KML, formerly Keyhole Markup Language # Future development of this format has been handed # over to the Open Geospatial Consortium. # http://www.opengeospatial.org/standards/kml/ # From: Asbjoern Sloth Toennesen -0 string \20 search/400 \ xmlns= >>&0 regex ['"]http://earth.google.com/kml Google KML document !:mime application/vnd.google-earth.kml+xml @@ -20,7 +22,7 @@ # From: Asbjoern Sloth Toennesen >>&0 regex ['"]http://www.opengis.net/kml OpenGIS KML document !:mime application/vnd.google-earth.kml+xml ->>>&1 string 2.2 \b, version 2.2 +>>>&1 string/t 2.2 \b, version 2.2 #------------------------------------------------------------------------------ # Type: Google KML Archive (ZIP based) diff --git a/contrib/file/Magdir/lecter b/contrib/file/Magdir/lecter index 87c186bab363..6ae87c12c0a6 100644 --- a/contrib/file/Magdir/lecter +++ b/contrib/file/Magdir/lecter @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: lecter,v 1.4 2009/09/19 16:28:10 christos Exp $ # DEC SRC Virtual Paper: Lectern files # Karl M. Hegbloom 0 string lect DEC SRC Virtual Paper Lectern file diff --git a/contrib/file/Magdir/lex b/contrib/file/Magdir/lex index eae9b107c231..cc9fac5e1fa7 100644 --- a/contrib/file/Magdir/lex +++ b/contrib/file/Magdir/lex @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: lex,v 1.6 2009/09/19 16:28:10 christos Exp $ # lex: file(1) magic for lex # # derived empirically, your offsets may vary! diff --git a/contrib/file/Magdir/lif b/contrib/file/Magdir/lif index cf20e4997d83..a7a0a8abe776 100644 --- a/contrib/file/Magdir/lif +++ b/contrib/file/Magdir/lif @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: lif,v 1.8 2009/09/19 16:28:10 christos Exp $ # lif: file(1) magic for lif # # (Daniel Quinlan ) diff --git a/contrib/file/Magdir/linux b/contrib/file/Magdir/linux index 83d4305d0a1f..8d4c60adfd9e 100644 --- a/contrib/file/Magdir/linux +++ b/contrib/file/Magdir/linux @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: linux,v 1.42 2012/02/07 21:35:03 christos Exp $ # linux: file(1) magic for Linux files # # Values for Linux/i386 binaries, from Daniel Quinlan @@ -43,38 +45,49 @@ # this can be overridden by the DOS executable (COM) entry 2 string LILO Linux/i386 LILO boot/chain loader # +# Linux make config build file, from Ole Aamot +28 string make\ config Linux make config build file +# # PSF fonts, from H. Peter Anvin -0 leshort 0x0436 Linux/i386 PC Screen Font data, ->2 byte 0 256 characters, no directory, ->2 byte 1 512 characters, no directory, ->2 byte 2 256 characters, Unicode directory, ->2 byte 3 512 characters, Unicode directory, +# Updated by Adam Buchbinder +# See: http://www.win.tue.nl/~aeb/linux/kbd/font-formats-1.html +0 leshort 0x0436 Linux/i386 PC Screen Font v1 data, +>2 byte&0x01 0 256 characters, +>2 byte&0x01 !0 512 characters, +>2 byte&0x02 0 no directory, +>2 byte&0x02 !0 Unicode directory, >3 byte >0 8x%d +0 string \x72\xb5\x4a\x86\x00\x00 Linux/i386 PC Screen Font v2 data, +>16 lelong x %d characters, +>12 lelong&0x01 0 no directory, +>12 lelong&0x01 !0 Unicode directory, +>24 lelong x %d +>28 lelong x \bx%d + # Linux swap file, from Daniel Quinlan 4086 string SWAP-SPACE Linux/i386 swap file # From: Jeff Bailey # Linux swap file with swsusp1 image, from Jeff Bailey 4076 string SWAPSPACE2S1SUSPEND Linux/i386 swap file (new style) with SWSUSP1 image +# From: James Hunt +4076 string SWAPSPACE2LINHIB0001 Linux/i386 swap file (new style) (compressed hibernate) # according to man page of mkswap (8) March 1999 -4086 string SWAPSPACE2 Linux/i386 swap file (new style) ->0x400 long x %d (4K pages) ->0x404 long x size %d pages ->>4086 string SWAPSPACE2 ->>>1052 string >\0 Label %s -# ECOFF magic for OSF/1 and Linux (only tested under Linux though) -# -# from Erik Troan (ewt@redhat.com) examining od dumps, so this -# could be wrong -# updated by David Mosberger (davidm@azstarnet.com) based on -# GNU BFD and MIPS info found below. -# -0 leshort 0x0183 ECOFF alpha ->24 leshort 0407 executable ->24 leshort 0410 pure ->24 leshort 0413 demand paged ->8 long >0 not stripped ->8 long 0 stripped ->23 leshort >0 - version %ld. +# volume label and UUID Russell Coker +# http://etbe.coker.com.au/2008/07/08/label-vs-uuid-vs-device/ +4086 string SWAPSPACE2 Linux/i386 swap file (new style), +>0x400 long x version %d (4K pages), +>0x404 long x size %d pages, +>1052 string \0 no label, +>1052 string >\0 LABEL=%s, +>0x40c belong x UUID=%08x +>0x410 beshort x \b-%04x +>0x412 beshort x \b-%04x +>0x414 beshort x \b-%04x +>0x416 belong x \b-%08x +>0x41a beshort x \b%04x +# From Daniel Novotny +# swap file for PowerPC +65526 string SWAPSPACE2 Linux/ppc swap file # # Linux kernel boot images, from Albert Cahalan # and others such as Axel Kohlmeyer @@ -153,6 +166,27 @@ >0x1e6 belong 0x454c4b53 ELKS Kernel >0x1e6 belong !0x454c4b53 style boot sector +############################################################################ +# Linux S390 kernel image +# Created by: Jan Kaluza +8 string \x02\x00\x00\x18\x60\x00\x00\x50\x02\x00\x00\x68\x60\x00\x00\x50\x40\x40\x40\x40\x40\x40\x40\x40 Linux S390 +>0x00010000 search/b/4096 \x00\x0a\x00\x00\x8b\xad\xcc\xcc +# 64bit +>>&0 string \xc1\x00\xef\xe3\xf0\x68\x00\x00 Z10 64bit kernel +>>&0 string \xc1\x00\xef\xc3\x00\x00\x00\x00 Z9-109 64bit kernel +>>&0 string \xc0\x00\x20\x00\x00\x00\x00\x00 Z990 64bit kernel +>>&0 string \x00\x00\x00\x00\x00\x00\x00\x00 Z900 64bit kernel +# 32bit +>>&0 string \x81\x00\xc8\x80\x00\x00\x00\x00 Z10 32bit kernel +>>&0 string \x81\x00\xc8\x80\x00\x00\x00\x00 Z9-109 32bit kernel +>>&0 string \x80\x00\x20\x00\x00\x00\x00\x00 Z990 32bit kernel +>>&0 string \x80\x00\x00\x00\x00\x00\x00\x00 Z900 32bit kernel + +# Linux ARM compressed kernel image +# From: Kevin Cernekee +36 lelong 0x016f2818 Linux kernel ARM boot executable zImage (little-endian) +36 belong 0x016f2818 Linux kernel ARM boot executable zImage (big-endian) + ############################################################################ # Linux 8086 executable 0 lelong&0xFF0000FF 0xC30000E9 Linux-Dev86 executable, headerless @@ -217,19 +251,55 @@ # # 0x200 seems to be the common case -0x218 string LVM2\ 001 LVM2 (Linux Logical Volume Manager) +0x218 string LVM2\ 001 LVM2 PV (Linux Logical Volume Manager) # read the offset to add to the start of the header, and the header # start in 0x200 ->(0x214.l+0x200) string >\0 , UUID: %s +>&(&-12.l-0x21) byte x +# display UUID in LVM format + display all 32 bytes (instead of max string length: 31) +>>&0x0 string >\x2f \b, UUID: %.6s +>>&0x6 string >\x2f \b-%.4s +>>&0xa string >\x2f \b-%.4s +>>&0xe string >\x2f \b-%.4s +>>&0x12 string >\x2f \b-%.4s +>>&0x16 string >\x2f \b-%.4s +>>&0x1a string >\x2f \b-%.6s +>>&0x20 lequad x \b, size: %lld -0x018 string LVM2\ 001 LVM2 (Linux Logical Volume Manager) ->(0x014.l) string >\0 , UUID: %s +0x018 string LVM2\ 001 LVM2 PV (Linux Logical Volume Manager) +>&(&-12.l-0x21) byte x +# display UUID in LVM format + display all 32 bytes (instead of max string length: 31) +>>&0x0 string >\x2f \b, UUID: %.6s +>>&0x6 string >\x2f \b-%.4s +>>&0xa string >\x2f \b-%.4s +>>&0xe string >\x2f \b-%.4s +>>&0x12 string >\x2f \b-%.4s +>>&0x16 string >\x2f \b-%.4s +>>&0x1a string >\x2f \b-%.6s +>>&0x20 lequad x \b, size: %lld -0x418 string LVM2\ 001 LVM2 (Linux Logical Volume Manager) ->(0x414.l+0x400) string >\0 , UUID: %s +0x418 string LVM2\ 001 LVM2 PV (Linux Logical Volume Manager) +>&(&-12.l-0x21) byte x +# display UUID in LVM format + display all 32 bytes (instead of max string length: 31) +>>&0x0 string >\x2f \b, UUID: %.6s +>>&0x6 string >\x2f \b-%.4s +>>&0xa string >\x2f \b-%.4s +>>&0xe string >\x2f \b-%.4s +>>&0x12 string >\x2f \b-%.4s +>>&0x16 string >\x2f \b-%.4s +>>&0x1a string >\x2f \b-%.6s +>>&0x20 lequad x \b, size: %lld -0x618 string LVM2\ 001 LVM2 (Linux Logical Volume Manager) ->(0x614.l+0x600) string >\0 , UUID: %s +0x618 string LVM2\ 001 LVM2 PV (Linux Logical Volume Manager) +>&(&-12.l-0x21) byte x +# display UUID in LVM format + display all 32 bytes (instead of max string length: 31) +>>&0x0 string >\x2f \b, UUID: %.6s +>>&0x6 string >\x2f \b-%.4s +>>&0xa string >\x2f \b-%.4s +>>&0xe string >\x2f \b-%.4s +>>&0x12 string >\x2f \b-%.4s +>>&0x16 string >\x2f \b-%.4s +>>&0x1a string >\x2f \b-%.6s +>>&0x20 lequad x \b, size: %lld # LVM snapshot # from Jason Farrel @@ -262,3 +332,9 @@ >20 search/256 (name >>&1 string x (name %s) +# Type: Xen, the virtual machine monitor +# From: Radek Vokal +0 string LinuxGuestRecord Xen saved domain +#>2 regex \(name\ [^)]*\) %s +>20 search/256 (name (name +>>&1 string x %s...) diff --git a/contrib/file/Magdir/lisp b/contrib/file/Magdir/lisp index c6f31dae4238..110988020ecd 100644 --- a/contrib/file/Magdir/lisp +++ b/contrib/file/Magdir/lisp @@ -1,4 +1,6 @@ + #------------------------------------------------------------------------------ +# $File: lisp,v 1.23 2009/09/19 16:28:10 christos Exp $ # lisp: file(1) magic for lisp programs # # various lisp types, from Daniel Quinlan (quinlan@yggdrasil.com) @@ -11,8 +13,6 @@ #>2 search/4096 !\r Lisp/Scheme program text #>2 search/4096 \r Windows INF file -0 search/4096 (if\ Lisp/Scheme program text -!:mime text/x-lisp 0 search/4096 (setq\ Lisp/Scheme program text !:mime text/x-lisp 0 search/4096 (defvar\ Lisp/Scheme program text diff --git a/contrib/file/Magdir/llvm b/contrib/file/Magdir/llvm index 8c1610cd10e6..44a40094036c 100644 --- a/contrib/file/Magdir/llvm +++ b/contrib/file/Magdir/llvm @@ -1,10 +1,13 @@ #------------------------------------------------------------------------------ +# $File: llvm,v 1.5 2010/09/20 18:55:20 rrt Exp $ # llvm: file(1) magic for LLVM byte-codes -# URL: http://llvm.cs.uiuc.edu/docs/BytecodeFormat.html#signature +# URL: http://llvm.org/docs/BitCodeFormat.html # From: Al Stone 0 string llvm LLVM byte-codes, uncompressed 0 string llvc0 LLVM byte-codes, null compression 0 string llvc1 LLVM byte-codes, gzip compression 0 string llvc2 LLVM byte-codes, bzip2 compression +0 string \xde\xc0\x17\x0b LLVM bitcode, wrapper +0 string BC\xc0\xde LLVM bitcode diff --git a/contrib/file/Magdir/lua b/contrib/file/Magdir/lua index 9aa87b159c22..61e69a63b547 100644 --- a/contrib/file/Magdir/lua +++ b/contrib/file/Magdir/lua @@ -1,12 +1,14 @@ + #------------------------------------------------------------------------------ +# $File: lua,v 1.5 2009/09/19 16:28:10 christos Exp $ # lua: file(1) magic for Lua scripting language # URL: http://www.lua.org/ # From: Reuben Thomas , Seo Sanghyeon # Lua scripts -0 search/1/b #!\ /usr/bin/lua Lua script text executable +0 search/1/w #!\ /usr/bin/lua Lua script text executable !:mime text/x-lua -0 search/1/b #!\ /usr/local/bin/lua Lua script text executable +0 search/1/w #!\ /usr/local/bin/lua Lua script text executable !:mime text/x-lua 0 search/1 #!/usr/bin/env\ lua Lua script text executable !:mime text/x-lua diff --git a/contrib/file/Magdir/luks b/contrib/file/Magdir/luks index 2ab23935ef69..6ecc40aff19a 100644 --- a/contrib/file/Magdir/luks +++ b/contrib/file/Magdir/luks @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: luks,v 1.4 2009/09/19 16:28:10 christos Exp $ # luks: file(1) magic for Linux Unified Key Setup # URL: http://luks.endorphin.org/spec # From: Anthon van der Neut diff --git a/contrib/file/Magdir/m4 b/contrib/file/Magdir/m4 new file mode 100644 index 000000000000..f6b5e52640f9 --- /dev/null +++ b/contrib/file/Magdir/m4 @@ -0,0 +1,6 @@ +#------------------------------------------------------------------------------ +# $File: m4,v 1.1 2011/12/08 12:12:46 rrt Exp $ +# make: file(1) magic for M4 scripts +# +0 regex \^dnl\ M4 macro processor script text +!:mime text/x-m4 diff --git a/contrib/file/Magdir/mach b/contrib/file/Magdir/mach index e53be0737d92..8d03f1ae4a28 100644 --- a/contrib/file/Magdir/mach +++ b/contrib/file/Magdir/mach @@ -1,4 +1,6 @@ + #------------------------------------------------------------ +# $File: mach,v 1.9 2009/09/19 16:28:10 christos Exp $ # Mach has two magic numbers, 0xcafebabe and 0xfeedface. # Unfortunately the first, cafebabe, is shared with # Java ByteCode, so they are both handled in the file "cafebabe". diff --git a/contrib/file/Magdir/macintosh b/contrib/file/Magdir/macintosh index ca665ded8cb8..b9933b1b4976 100644 --- a/contrib/file/Magdir/macintosh +++ b/contrib/file/Magdir/macintosh @@ -1,5 +1,6 @@ #------------------------------------------------------------------------------ +# $File: macintosh,v 1.22 2011/05/17 17:40:31 rrt Exp $ # macintosh description # # BinHex is the Macintosh ASCII-encoded file format (see also "apple") @@ -284,7 +285,7 @@ >0x412 beshort x number of blocks: %d, >0x424 pstring x volume name: %s -# "BD" is has many false positives +# "BD" gives many false positives #0x400 beshort 0x4244 Macintosh HFS data #>0 beshort 0x4C4B (bootable) #>0x40a beshort &0x8000 (locked) @@ -375,3 +376,15 @@ # From: Remi Mommsen 0 string BOMStore Mac OS X bill of materials (BOM) file + +# From: Adam Buchbinder +# URL: http://en.wikipedia.org/wiki/Datafork_TrueType +# Derived from the 'fondu' and 'ufond' source code (fondu.sf.net). 'sfnt' is +# TrueType; 'POST' is PostScript. 'FONT' and 'NFNT' sometimes appear, but I +# don't know what they mean. +0 belong 0x100 +>(0x4.L+24) beshort x +>>&4 belong 0x73666e74 Mac OSX datafork font, TrueType +>>&4 belong 0x464f4e54 Mac OSX datafork font, 'FONT' +>>&4 belong 0x4e464e54 Mac OSX datafork font, 'NFNT' +>>&4 belong 0x504f5354 Mac OSX datafork font, PostScript diff --git a/contrib/file/Magdir/magic b/contrib/file/Magdir/magic index 3bf4e2ee7acb..0de332aa3bfb 100644 --- a/contrib/file/Magdir/magic +++ b/contrib/file/Magdir/magic @@ -1,8 +1,9 @@ #------------------------------------------------------------------------------ +# $File: magic,v 1.10 2010/11/25 15:00:12 christos Exp $ # magic: file(1) magic for magic files # -0 string #\ Magic magic text file for file(1) cmd +0 string/t #\ Magic magic text file for file(1) cmd 0 lelong 0xF11E041C magic binary file for file(1) cmd >4 lelong x (version %d) (little endian) 0 belong 0xF11E041C magic binary file for file(1) cmd diff --git a/contrib/file/Magdir/mail.news b/contrib/file/Magdir/mail.news index 0b9d90a4e20a..f4bb1f55e90b 100644 --- a/contrib/file/Magdir/mail.news +++ b/contrib/file/Magdir/mail.news @@ -1,34 +1,36 @@ #------------------------------------------------------------------------------ +# $File: mail.news,v 1.20 2011/12/08 12:12:46 rrt Exp $ # mail.news: file(1) magic for mail and news # # Unfortunately, saved netnews also has From line added in some news software. #0 string From mail text -# There are tests to ascmagic.c to cope with mail and news. -0 string Relay-Version: old news text +0 string/t Relay-Version: old news text !:mime message/rfc822 -0 string #!\ rnews batched news text +0 string/t #!\ rnews batched news text !:mime message/rfc822 -0 string N#!\ rnews mailed, batched news text +0 string/t N#!\ rnews mailed, batched news text !:mime message/rfc822 -0 string Forward\ to mail forwarding text +0 string/t Forward\ to mail forwarding text !:mime message/rfc822 -0 string Pipe\ to mail piping text +0 string/t Pipe\ to mail piping text !:mime message/rfc822 -0 string Return-Path: smtp mail text +0 string/t Delivered-To: SMTP mail text !:mime message/rfc822 -0 string Path: news text +0 string/t Return-Path: SMTP mail text +!:mime message/rfc822 +0 string/t Path: news text !:mime message/news -0 string Xref: news text +0 string/t Xref: news text !:mime message/news -0 string From: news or mail text +0 string/t From: news or mail text !:mime message/rfc822 -0 string Article saved news text +0 string/t Article saved news text !:mime message/news -0 string BABYL Emacs RMAIL text -0 string Received: RFC 822 mail text +0 string/t BABYL Emacs RMAIL text +0 string/t Received: RFC 822 mail text !:mime message/rfc822 -0 string MIME-Version: MIME entity text -#0 string Content- MIME entity text +0 string/t MIME-Version: MIME entity text +#0 string/t Content- MIME entity text # TNEF files... 0 lelong 0x223E9F78 Transport Neutral Encapsulation Format @@ -51,3 +53,14 @@ #>4 leshort >0 (%d messages) #0 string \arena mappings via + the "thread.arena" mallctl. + - Add compile-time support for all TLS-related functionality via pthreads TSD. + This is mainly of interest for OS X, which does not support TLS, but has a + TSD implementation with similar performance. + - Override memalign() and valloc() if they are provided by the system. + - Add the "arenas.purge" mallctl, which can be used to synchronously purge all + dirty unused pages. + - Make cumulative heap profiling data optional, so that it is possible to + limit the amount of memory consumed by heap profiling data structures. + - Add per thread allocation counters that can be accessed via the + "thread.allocated" and "thread.deallocated" mallctls. + + Incompatible changes: + - Remove JEMALLOC_OPTIONS and malloc_options (see MALLOC_CONF above). + - Increase default backtrace depth from 4 to 128 for heap profiling. + - Disable interval-based profile dumps by default. + + Bug fixes: + - Remove bad assertions in fork handler functions. These assertions could + cause aborts for some combinations of configure settings. + - Fix strerror_r() usage to deal with non-standard semantics in GNU libc. + - Fix leak context reporting. This bug tended to cause the number of contexts + to be underreported (though the reported number of objects and bytes were + correct). + - Fix a realloc() bug for large in-place growing reallocation. This bug could + cause memory corruption, but it was hard to trigger. + - Fix an allocation bug for small allocations that could be triggered if + multiple threads raced to create a new run of backing pages. + - Enhance the heap profiler to trigger samples based on usable size, rather + than request size. + - Fix a heap profiling bug due to sometimes losing track of requested object + size for sampled objects. + +* 1.0.3 (August 12, 2010) + + Bug fixes: + - Fix the libunwind-based implementation of stack backtracing (used for heap + profiling). This bug could cause zero-length backtraces to be reported. + - Add a missing mutex unlock in library initialization code. If multiple + threads raced to initialize malloc, some of them could end up permanently + blocked. + +* 1.0.2 (May 11, 2010) + + Bug fixes: + - Fix junk filling of large objects, which could cause memory corruption. + - Add MAP_NORESERVE support for chunk mapping, because otherwise virtual + memory limits could cause swap file configuration to fail. Contributed by + Jordan DeLong. + +* 1.0.1 (April 14, 2010) + + Bug fixes: + - Fix compilation when --enable-fill is specified. + - Fix threads-related profiling bugs that affected accuracy and caused memory + to be leaked during thread exit. + - Fix dirty page purging race conditions that could cause crashes. + - Fix crash in tcache flushing code during thread destruction. + +* 1.0.0 (April 11, 2010) + + This release focuses on speed and run-time introspection. Numerous + algorithmic improvements make this release substantially faster than its + predecessors. + + New features: + - Implement autoconf-based configuration system. + - Add mallctl*(), for the purposes of introspection and run-time + configuration. + - Make it possible for the application to manually flush a thread's cache, via + the "tcache.flush" mallctl. + - Base maximum dirty page count on proportion of active memory. + - Compute various addtional run-time statistics, including per size class + statistics for large objects. + - Expose malloc_stats_print(), which can be called repeatedly by the + application. + - Simplify the malloc_message() signature to only take one string argument, + and incorporate an opaque data pointer argument for use by the application + in combination with malloc_stats_print(). + - Add support for allocation backed by one or more swap files, and allow the + application to disable over-commit if swap files are in use. + - Implement allocation profiling and leak checking. + + Removed features: + - Remove the dynamic arena rebalancing code, since thread-specific caching + reduces its utility. + + Bug fixes: + - Modify chunk allocation to work when address space layout randomization + (ASLR) is in use. + - Fix thread cleanup bugs related to TLS destruction. + - Handle 0-size allocation requests in posix_memalign(). + - Fix a chunk leak. The leaked chunks were never touched, so this impacted + virtual memory usage, but not physical memory usage. + +* linux_2008082[78]a (August 27/28, 2008) + + These snapshot releases are the simple result of incorporating Linux-specific + support into the FreeBSD malloc sources. + +-------------------------------------------------------------------------------- +vim:filetype=text:textwidth=80 diff --git a/contrib/jemalloc/FREEBSD-Xlist b/contrib/jemalloc/FREEBSD-Xlist new file mode 100644 index 000000000000..f3945de46f8c --- /dev/null +++ b/contrib/jemalloc/FREEBSD-Xlist @@ -0,0 +1,23 @@ +$FreeBSD$ +.git +.gitignore +FREEBSD-* +INSTALL +Makefile* +README +autogen.sh +autom4te.cache/ +bin/ +config.* +configure* +doc/*.in +doc/*.xml +doc/*.xsl +doc/*.html +include/jemalloc/internal/jemalloc_internal.h.in +include/jemalloc/internal/size_classes.sh +include/jemalloc/jemalloc.h.in +include/jemalloc/jemalloc_defs.h.in +install-sh +src/zone.c +test/ diff --git a/contrib/jemalloc/FREEBSD-diffs b/contrib/jemalloc/FREEBSD-diffs new file mode 100644 index 000000000000..d87d0fc68e96 --- /dev/null +++ b/contrib/jemalloc/FREEBSD-diffs @@ -0,0 +1,249 @@ +diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in +index 98d0ba4..23d2152 100644 +--- a/doc/jemalloc.xml.in ++++ b/doc/jemalloc.xml.in +@@ -51,12 +51,23 @@ + This manual describes jemalloc @jemalloc_version@. More information + can be found at the jemalloc website. ++ ++ The following configuration options are enabled in libc's built-in ++ jemalloc: , ++ , , ++ , , ++ , , ++ , , and ++ . Additionally, ++ is enabled in development versions of ++ FreeBSD (controlled by the MALLOC_PRODUCTION make ++ variable). + + + SYNOPSIS + + #include <stdlib.h> +-#include <jemalloc/jemalloc.h> ++#include <malloc_np.h> + + Standard API + +@@ -2080,4 +2091,16 @@ malloc_conf = "lg_chunk:24";]]> + The posix_memalign function conforms + to IEEE Std 1003.1-2001 (“POSIX.1”). + ++ ++ HISTORY ++ The malloc_usable_size and ++ posix_memalign functions first appeared in ++ FreeBSD 7.0. ++ ++ The aligned_alloc, ++ malloc_stats_print, ++ mallctl*, and ++ *allocm functions first appeared in ++ FreeBSD 10.0. ++ + +diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in +index 905653a..b235a0d 100644 +--- a/include/jemalloc/internal/jemalloc_internal.h.in ++++ b/include/jemalloc/internal/jemalloc_internal.h.in +@@ -1,5 +1,8 @@ + #ifndef JEMALLOC_INTERNAL_H + #define JEMALLOC_INTERNAL_H ++#include "libc_private.h" ++#include "namespace.h" ++ + #include + #include + #include +@@ -35,6 +38,9 @@ + #include + #include + ++#include "un-namespace.h" ++#include "libc_private.h" ++ + #define JEMALLOC_NO_DEMANGLE + #include "../jemalloc@install_suffix@.h" + +diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h +index c46feee..d7133f4 100644 +--- a/include/jemalloc/internal/mutex.h ++++ b/include/jemalloc/internal/mutex.h +@@ -39,8 +39,6 @@ struct malloc_mutex_s { + + #ifdef JEMALLOC_LAZY_LOCK + extern bool isthreaded; +-#else +-# define isthreaded true + #endif + + bool malloc_mutex_init(malloc_mutex_t *mutex); +diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in +index f0581db..f26d8bc 100644 +--- a/include/jemalloc/jemalloc.h.in ++++ b/include/jemalloc/jemalloc.h.in +@@ -15,6 +15,7 @@ extern "C" { + #define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" + + #include "jemalloc_defs@install_suffix@.h" ++#include "jemalloc_FreeBSD.h" + + #ifdef JEMALLOC_EXPERIMENTAL + #define ALLOCM_LG_ALIGN(la) (la) +diff --git a/include/jemalloc/jemalloc_FreeBSD.h b/include/jemalloc/jemalloc_FreeBSD.h +new file mode 100644 +index 0000000..2c5797f +--- /dev/null ++++ b/include/jemalloc/jemalloc_FreeBSD.h +@@ -0,0 +1,76 @@ ++/* ++ * Override settings that were generated in jemalloc_defs.h as necessary. ++ */ ++ ++#undef JEMALLOC_OVERRIDE_VALLOC ++ ++#ifndef MALLOC_PRODUCTION ++#define JEMALLOC_DEBUG ++#endif ++ ++/* ++ * The following are architecture-dependent, so conditionally define them for ++ * each supported architecture. ++ */ ++#undef CPU_SPINWAIT ++#undef JEMALLOC_TLS_MODEL ++#undef STATIC_PAGE_SHIFT ++#undef LG_SIZEOF_PTR ++#undef LG_SIZEOF_INT ++#undef LG_SIZEOF_LONG ++#undef LG_SIZEOF_INTMAX_T ++ ++#ifdef __i386__ ++# define LG_SIZEOF_PTR 2 ++# define CPU_SPINWAIT __asm__ volatile("pause") ++# define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) ++#endif ++#ifdef __ia64__ ++# define LG_SIZEOF_PTR 3 ++#endif ++#ifdef __sparc64__ ++# define LG_SIZEOF_PTR 3 ++# define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) ++#endif ++#ifdef __amd64__ ++# define LG_SIZEOF_PTR 3 ++# define CPU_SPINWAIT __asm__ volatile("pause") ++# define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) ++#endif ++#ifdef __arm__ ++# define LG_SIZEOF_PTR 2 ++#endif ++#ifdef __mips__ ++# define LG_SIZEOF_PTR 2 ++#endif ++#ifdef __powerpc64__ ++# define LG_SIZEOF_PTR 3 ++#elif defined(__powerpc__) ++# define LG_SIZEOF_PTR 2 ++#endif ++ ++#ifndef JEMALLOC_TLS_MODEL ++# define JEMALLOC_TLS_MODEL /* Default. */ ++#endif ++#ifdef __clang__ ++# undef JEMALLOC_TLS_MODEL ++# define JEMALLOC_TLS_MODEL /* clang does not support tls_model yet. */ ++#endif ++ ++#define STATIC_PAGE_SHIFT PAGE_SHIFT ++#define LG_SIZEOF_INT 2 ++#define LG_SIZEOF_LONG LG_SIZEOF_PTR ++#define LG_SIZEOF_INTMAX_T 3 ++ ++/* Disable lazy-lock machinery, mangle isthreaded, and adjust its type. */ ++#undef JEMALLOC_LAZY_LOCK ++extern int __isthreaded; ++#define isthreaded ((bool)__isthreaded) ++ ++/* Mangle. */ ++#define open _open ++#define read _read ++#define write _write ++#define close _close ++#define pthread_mutex_lock _pthread_mutex_lock ++#define pthread_mutex_unlock _pthread_mutex_unlock +diff --git a/src/jemalloc.c b/src/jemalloc.c +index 0decd8a..73fad29 100644 +--- a/src/jemalloc.c ++++ b/src/jemalloc.c +@@ -8,6 +8,9 @@ malloc_tsd_data(, arenas, arena_t *, NULL) + malloc_tsd_data(, thread_allocated, thread_allocated_t, + THREAD_ALLOCATED_INITIALIZER) + ++const char *__malloc_options_1_0; ++__sym_compat(_malloc_options, __malloc_options_1_0, FBSD_1.0); ++ + /* Runtime configuration options. */ + const char *je_malloc_conf JEMALLOC_ATTR(visibility("default")); + #ifdef JEMALLOC_DEBUG +@@ -401,7 +404,8 @@ malloc_conf_init(void) + #endif + ; + +- if ((opts = getenv(envname)) != NULL) { ++ if (issetugid() == 0 && (opts = getenv(envname)) != ++ NULL) { + /* + * Do nothing; opts is already initialized to + * the value of the MALLOC_CONF environment +diff --git a/src/mutex.c b/src/mutex.c +index 4b8ce57..7be5fc9 100644 +--- a/src/mutex.c ++++ b/src/mutex.c +@@ -63,6 +63,17 @@ pthread_create(pthread_t *__restrict thread, + #ifdef JEMALLOC_MUTEX_INIT_CB + int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, + void *(calloc_cb)(size_t, size_t)); ++ ++__weak_reference(_pthread_mutex_init_calloc_cb_stub, ++ _pthread_mutex_init_calloc_cb); ++ ++int ++_pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex, ++ void *(calloc_cb)(size_t, size_t)) ++{ ++ ++ return (0); ++} + #endif + + bool +diff --git a/src/util.c b/src/util.c +index 2aab61f..8b05042 100644 +--- a/src/util.c ++++ b/src/util.c +@@ -60,6 +60,22 @@ wrtmessage(void *cbopaque, const char *s) + void (*je_malloc_message)(void *, const char *s) + JEMALLOC_ATTR(visibility("default")) = wrtmessage; + ++JEMALLOC_CATTR(visibility("hidden"), static) ++void ++wrtmessage_1_0(const char *s1, const char *s2, const char *s3, ++ const char *s4) ++{ ++ ++ wrtmessage(NULL, s1); ++ wrtmessage(NULL, s2); ++ wrtmessage(NULL, s3); ++ wrtmessage(NULL, s4); ++} ++ ++void (*__malloc_message_1_0)(const char *s1, const char *s2, const char *s3, ++ const char *s4) = wrtmessage_1_0; ++__sym_compat(_malloc_message, __malloc_message_1_0, FBSD_1.0); ++ + /* + * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so + * provide a wrapper. diff --git a/contrib/jemalloc/FREEBSD-upgrade b/contrib/jemalloc/FREEBSD-upgrade new file mode 100755 index 000000000000..0f48ffeecd25 --- /dev/null +++ b/contrib/jemalloc/FREEBSD-upgrade @@ -0,0 +1,122 @@ +#!/bin/sh +# $FreeBSD$ +# +# Usage: cd /usr/src/contrib/jemalloc +# ./FREEBSD-upgrade [args] +# +# At least the following ports are required when importing jemalloc: +# - devel/autoconf +# - devel/git +# - devel/gmake +# - textproc/docbook-xsl +# +# The normal workflow for importing a new release is: +# +# cd /usr/src/contrib/jemalloc +# +# Merge local changes that were made since the previous import: +# +# ./FREEBSD-upgrade merge-changes +# ./FREEBSD-upgrade rediff +# +# Extract latest jemalloc release. +# +# ./FREEBSD-upgrade extract +# +# Fix patch conflicts as necessary, then regenerate diffs to update line +# offsets: +# +# ./FREEBSD-upgrade rediff +# ./FREEBSD-upgrade extract +# +# Do multiple buildworld/installworld rounds. If problems arise and patches +# are needed, edit the code in ${work} as necessary, then: +# +# ./FREEBSD-upgrade rediff +# ./FREEBSD-upgrade extract +# +# The rediff/extract order is important because rediff saves the local +# changes, then extract blows away the work tree and re-creates it with the +# diffs applied. +# +# Finally, to clean up: +# +# ./FREEBSD-upgrade clean + +set -e + +if [ ! -x "FREEBSD-upgrade" ] ; then + echo "Run from within src/contrib/jemalloc/" >&2 + exit 1 +fi + +src=`pwd` +workname="jemalloc.git" +work="${src}/../${workname}" # merge-changes expects ${workname} in "..". +changes="${src}/FREEBSD-changes" + +do_extract() { + local rev=$1 + # Clone. + rm -rf ${work} + git clone git://canonware.com/jemalloc.git ${work} + ( + cd ${work} + if [ "x${rev}" != "x" ] ; then + # Use optional rev argument to check out a revision other than HEAD on + # master. + git checkout ${rev} + fi + # Apply diffs before generating files. + patch -p1 < "${src}/FREEBSD-diffs" + find . -name '*.orig' -delete + # Generate various files. + ./autogen.sh --enable-cc-silence --enable-dss --enable-xmalloc \ + --enable-utrace --with-xslroot=/usr/local/share/xsl/docbook + gmake dist + ) +} + +do_diff() { + (cd ${work}; git add -A; git diff --cached) > FREEBSD-diffs +} + +command=$1 +shift +case "${command}" in + merge-changes) # Merge local changes that were made since the previous import. + rev=`cat VERSION |tr 'g' ' ' |awk '{print $2}'` + # Extract code corresponding to most recent import. + do_extract ${rev} + # Compute local differences to the upstream+patches and apply them. + ( + cd .. + diff -ru -X ${src}/FREEBSD-Xlist ${workname} jemalloc > ${changes} || true + ) + ( + cd ${work} + patch -p1 < ${changes} + find . -name '*.orig' -delete + ) + # Update diff. + do_diff + ;; + extract) # Extract upstream sources, apply patches, copy to contrib/jemalloc. + rev=$1 + do_extract ${rev} + # Delete existing files so that cruft doesn't silently remain. + rm -rf ChangeLog COPYING VERSION doc include src + # Copy files over. + tar cf - -C ${work} -X FREEBSD-Xlist . |tar xvf - + ;; + rediff) # Regenerate diffs based on working tree. + do_diff + ;; + clean) # Remove working tree and temporary files. + rm -rf ${work} ${changes} + ;; + *) + echo "Unsupported command: \"${command}\"" >&2 + exit 1 + ;; +esac diff --git a/contrib/jemalloc/VERSION b/contrib/jemalloc/VERSION new file mode 100644 index 000000000000..f751c8df6454 --- /dev/null +++ b/contrib/jemalloc/VERSION @@ -0,0 +1 @@ +1.0.0-266-gb57d3ec571c6551231be62b7bf92c084a8c8291c diff --git a/contrib/jemalloc/doc/jemalloc.3 b/contrib/jemalloc/doc/jemalloc.3 new file mode 100644 index 000000000000..a8a8b772730c --- /dev/null +++ b/contrib/jemalloc/doc/jemalloc.3 @@ -0,0 +1,1464 @@ +'\" t +.\" Title: JEMALLOC +.\" Author: Jason Evans +.\" Generator: DocBook XSL Stylesheets v1.76.1 +.\" Date: 04/17/2012 +.\" Manual: User Manual +.\" Source: jemalloc 1.0.0-266-gb57d3ec571c6551231be62b7bf92c084a8c8291c +.\" Language: English +.\" +.TH "JEMALLOC" "3" "04/17/2012" "jemalloc 1.0.0-266-gb57d3ec571" "User Manual" +.\" ----------------------------------------------------------------- +.\" * Define some portability stuff +.\" ----------------------------------------------------------------- +.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.\" http://bugs.debian.org/507673 +.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html +.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.\" ----------------------------------------------------------------- +.\" * set default formatting +.\" ----------------------------------------------------------------- +.\" disable hyphenation +.nh +.\" disable justification (adjust text to left margin only) +.ad l +.\" ----------------------------------------------------------------- +.\" * MAIN CONTENT STARTS HERE * +.\" ----------------------------------------------------------------- +.SH "NAME" +jemalloc \- general purpose memory allocation functions +.SH "LIBRARY" +.PP +This manual describes jemalloc 1\&.0\&.0\-266\-gb57d3ec571c6551231be62b7bf92c084a8c8291c\&. More information can be found at the +\m[blue]\fBjemalloc website\fR\m[]\&\s-2\u[1]\d\s+2\&. +.PP +The following configuration options are enabled in libc\*(Aqs built\-in jemalloc: +\fB\-\-enable\-dss\fR, +\fB\-\-enable\-experimental\fR, +\fB\-\-enable\-fill\fR, +\fB\-\-enable\-lazy\-lock\fR, +\fB\-\-enable\-munmap\fR, +\fB\-\-enable\-stats\fR, +\fB\-\-enable\-tcache\fR, +\fB\-\-enable\-tls\fR, +\fB\-\-enable\-utrace\fR, and +\fB\-\-enable\-xmalloc\fR\&. Additionally, +\fB\-\-enable\-debug\fR +is enabled in development versions of FreeBSD (controlled by the +\fBMALLOC_PRODUCTION\fR +make variable)\&. +.SH "SYNOPSIS" +.sp +.ft B +.nf +#include +#include +.fi +.ft +.SS "Standard API" +.HP \w'void\ *malloc('u +.BI "void *malloc(size_t\ " "size" ");" +.HP \w'void\ *calloc('u +.BI "void *calloc(size_t\ " "number" ", size_t\ " "size" ");" +.HP \w'int\ posix_memalign('u +.BI "int posix_memalign(void\ **" "ptr" ", size_t\ " "alignment" ", size_t\ " "size" ");" +.HP \w'void\ *aligned_alloc('u +.BI "void *aligned_alloc(size_t\ " "alignment" ", size_t\ " "size" ");" +.HP \w'void\ *realloc('u +.BI "void *realloc(void\ *" "ptr" ", size_t\ " "size" ");" +.HP \w'void\ free('u +.BI "void free(void\ *" "ptr" ");" +.SS "Non\-standard API" +.HP \w'size_t\ malloc_usable_size('u +.BI "size_t malloc_usable_size(const\ void\ *" "ptr" ");" +.HP \w'void\ malloc_stats_print('u +.BI "void malloc_stats_print(void\ " "(*write_cb)" "\ (void\ *,\ const\ char\ *), void\ *" "cbopaque" ", const\ char\ *" "opts" ");" +.HP \w'int\ mallctl('u +.BI "int mallctl(const\ char\ *" "name" ", void\ *" "oldp" ", size_t\ *" "oldlenp" ", void\ *" "newp" ", size_t\ " "newlen" ");" +.HP \w'int\ mallctlnametomib('u +.BI "int mallctlnametomib(const\ char\ *" "name" ", size_t\ *" "mibp" ", size_t\ *" "miblenp" ");" +.HP \w'int\ mallctlbymib('u +.BI "int mallctlbymib(const\ size_t\ *" "mib" ", size_t\ " "miblen" ", void\ *" "oldp" ", size_t\ *" "oldlenp" ", void\ *" "newp" ", size_t\ " "newlen" ");" +.HP \w'void\ (*malloc_message)('u +.BI "void (*malloc_message)(void\ *" "cbopaque" ", const\ char\ *" "s" ");" +.PP +const char *\fImalloc_conf\fR; +.SS "Experimental API" +.HP \w'int\ allocm('u +.BI "int allocm(void\ **" "ptr" ", size_t\ *" "rsize" ", size_t\ " "size" ", int\ " "flags" ");" +.HP \w'int\ rallocm('u +.BI "int rallocm(void\ **" "ptr" ", size_t\ *" "rsize" ", size_t\ " "size" ", size_t\ " "extra" ", int\ " "flags" ");" +.HP \w'int\ sallocm('u +.BI "int sallocm(const\ void\ *" "ptr" ", size_t\ *" "rsize" ", int\ " "flags" ");" +.HP \w'int\ dallocm('u +.BI "int dallocm(void\ *" "ptr" ", int\ " "flags" ");" +.HP \w'int\ nallocm('u +.BI "int nallocm(size_t\ *" "rsize" ", size_t\ " "size" ", int\ " "flags" ");" +.SH "DESCRIPTION" +.SS "Standard API" +.PP +The +\fBmalloc\fR\fB\fR +function allocates +\fIsize\fR +bytes of uninitialized memory\&. The allocated space is suitably aligned (after possible pointer coercion) for storage of any type of object\&. +.PP +The +\fBcalloc\fR\fB\fR +function allocates space for +\fInumber\fR +objects, each +\fIsize\fR +bytes in length\&. The result is identical to calling +\fBmalloc\fR\fB\fR +with an argument of +\fInumber\fR +* +\fIsize\fR, with the exception that the allocated memory is explicitly initialized to zero bytes\&. +.PP +The +\fBposix_memalign\fR\fB\fR +function allocates +\fIsize\fR +bytes of memory such that the allocation\*(Aqs base address is an even multiple of +\fIalignment\fR, and returns the allocation in the value pointed to by +\fIptr\fR\&. The requested +\fIalignment\fR +must be a power of 2 at least as large as +sizeof(\fBvoid *\fR)\&. +.PP +The +\fBaligned_alloc\fR\fB\fR +function allocates +\fIsize\fR +bytes of memory such that the allocation\*(Aqs base address is an even multiple of +\fIalignment\fR\&. The requested +\fIalignment\fR +must be a power of 2\&. Behavior is undefined if +\fIsize\fR +is not an integral multiple of +\fIalignment\fR\&. +.PP +The +\fBrealloc\fR\fB\fR +function changes the size of the previously allocated memory referenced by +\fIptr\fR +to +\fIsize\fR +bytes\&. The contents of the memory are unchanged up to the lesser of the new and old sizes\&. If the new size is larger, the contents of the newly allocated portion of the memory are undefined\&. Upon success, the memory referenced by +\fIptr\fR +is freed and a pointer to the newly allocated memory is returned\&. Note that +\fBrealloc\fR\fB\fR +may move the memory allocation, resulting in a different return value than +\fIptr\fR\&. If +\fIptr\fR +is +\fBNULL\fR, the +\fBrealloc\fR\fB\fR +function behaves identically to +\fBmalloc\fR\fB\fR +for the specified size\&. +.PP +The +\fBfree\fR\fB\fR +function causes the allocated memory referenced by +\fIptr\fR +to be made available for future allocations\&. If +\fIptr\fR +is +\fBNULL\fR, no action occurs\&. +.SS "Non\-standard API" +.PP +The +\fBmalloc_usable_size\fR\fB\fR +function returns the usable size of the allocation pointed to by +\fIptr\fR\&. The return value may be larger than the size that was requested during allocation\&. The +\fBmalloc_usable_size\fR\fB\fR +function is not a mechanism for in\-place +\fBrealloc\fR\fB\fR; rather it is provided solely as a tool for introspection purposes\&. Any discrepancy between the requested allocation size and the size reported by +\fBmalloc_usable_size\fR\fB\fR +should not be depended on, since such behavior is entirely implementation\-dependent\&. +.PP +The +\fBmalloc_stats_print\fR\fB\fR +function writes human\-readable summary statistics via the +\fIwrite_cb\fR +callback function pointer and +\fIcbopaque\fR +data passed to +\fIwrite_cb\fR, or +\fBmalloc_message\fR\fB\fR +if +\fIwrite_cb\fR +is +\fBNULL\fR\&. This function can be called repeatedly\&. General information that never changes during execution can be omitted by specifying "g" as a character within the +\fIopts\fR +string\&. Note that +\fBmalloc_message\fR\fB\fR +uses the +\fBmallctl*\fR\fB\fR +functions internally, so inconsistent statistics can be reported if multiple threads use these functions simultaneously\&. If +\fB\-\-enable\-stats\fR +is specified during configuration, \(lqm\(rq and \(lqa\(rq can be specified to omit merged arena and per arena statistics, respectively; \(lqb\(rq and \(lql\(rq can be specified to omit per size class statistics for bins and large objects, respectively\&. Unrecognized characters are silently ignored\&. Note that thread caching may prevent some statistics from being completely up to date, since extra locking would be required to merge counters that track thread cache operations\&. +.PP +The +\fBmallctl\fR\fB\fR +function provides a general interface for introspecting the memory allocator, as well as setting modifiable parameters and triggering actions\&. The period\-separated +\fIname\fR +argument specifies a location in a tree\-structured namespace; see the +MALLCTL NAMESPACE +section for documentation on the tree contents\&. To read a value, pass a pointer via +\fIoldp\fR +to adequate space to contain the value, and a pointer to its length via +\fIoldlenp\fR; otherwise pass +\fBNULL\fR +and +\fBNULL\fR\&. Similarly, to write a value, pass a pointer to the value via +\fInewp\fR, and its length via +\fInewlen\fR; otherwise pass +\fBNULL\fR +and +\fB0\fR\&. +.PP +The +\fBmallctlnametomib\fR\fB\fR +function provides a way to avoid repeated name lookups for applications that repeatedly query the same portion of the namespace, by translating a name to a \(lqManagement Information Base\(rq (MIB) that can be passed repeatedly to +\fBmallctlbymib\fR\fB\fR\&. Upon successful return from +\fBmallctlnametomib\fR\fB\fR, +\fImibp\fR +contains an array of +\fI*miblenp\fR +integers, where +\fI*miblenp\fR +is the lesser of the number of components in +\fIname\fR +and the input value of +\fI*miblenp\fR\&. Thus it is possible to pass a +\fI*miblenp\fR +that is smaller than the number of period\-separated name components, which results in a partial MIB that can be used as the basis for constructing a complete MIB\&. For name components that are integers (e\&.g\&. the 2 in +"arenas\&.bin\&.2\&.size"), the corresponding MIB component will always be that integer\&. Therefore, it is legitimate to construct code like the following: +.sp +.if n \{\ +.RS 4 +.\} +.nf +unsigned nbins, i; + +int mib[4]; +size_t len, miblen; + +len = sizeof(nbins); +mallctl("arenas\&.nbins", &nbins, &len, NULL, 0); + +miblen = 4; +mallnametomib("arenas\&.bin\&.0\&.size", mib, &miblen); +for (i = 0; i < nbins; i++) { + size_t bin_size; + + mib[2] = i; + len = sizeof(bin_size); + mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0); + /* Do something with bin_size\&.\&.\&. */ +} +.fi +.if n \{\ +.RE +.\} +.SS "Experimental API" +.PP +The experimental API is subject to change or removal without regard for backward compatibility\&. If +\fB\-\-disable\-experimental\fR +is specified during configuration, the experimental API is omitted\&. +.PP +The +\fBallocm\fR\fB\fR, +\fBrallocm\fR\fB\fR, +\fBsallocm\fR\fB\fR, +\fBdallocm\fR\fB\fR, and +\fBnallocm\fR\fB\fR +functions all have a +\fIflags\fR +argument that can be used to specify options\&. The functions only check the options that are contextually relevant\&. Use bitwise or (|) operations to specify one or more of the following: +.PP +\fBALLOCM_LG_ALIGN(\fR\fB\fIla\fR\fR\fB) \fR +.RS 4 +Align the memory allocation to start at an address that is a multiple of +(1 << \fIla\fR)\&. This macro does not validate that +\fIla\fR +is within the valid range\&. +.RE +.PP +\fBALLOCM_ALIGN(\fR\fB\fIa\fR\fR\fB) \fR +.RS 4 +Align the memory allocation to start at an address that is a multiple of +\fIa\fR, where +\fIa\fR +is a power of two\&. This macro does not validate that +\fIa\fR +is a power of 2\&. +.RE +.PP +\fBALLOCM_ZERO\fR +.RS 4 +Initialize newly allocated memory to contain zero bytes\&. In the growing reallocation case, the real size prior to reallocation defines the boundary between untouched bytes and those that are initialized to contain zero bytes\&. If this option is absent, newly allocated memory is uninitialized\&. +.RE +.PP +\fBALLOCM_NO_MOVE\fR +.RS 4 +For reallocation, fail rather than moving the object\&. This constraint can apply to both growth and shrinkage\&. +.RE +.PP +The +\fBallocm\fR\fB\fR +function allocates at least +\fIsize\fR +bytes of memory, sets +\fI*ptr\fR +to the base address of the allocation, and sets +\fI*rsize\fR +to the real size of the allocation if +\fIrsize\fR +is not +\fBNULL\fR\&. Behavior is undefined if +\fIsize\fR +is +\fB0\fR\&. +.PP +The +\fBrallocm\fR\fB\fR +function resizes the allocation at +\fI*ptr\fR +to be at least +\fIsize\fR +bytes, sets +\fI*ptr\fR +to the base address of the allocation if it moved, and sets +\fI*rsize\fR +to the real size of the allocation if +\fIrsize\fR +is not +\fBNULL\fR\&. If +\fIextra\fR +is non\-zero, an attempt is made to resize the allocation to be at least +\fIsize\fR + \fIextra\fR) +bytes, though inability to allocate the extra byte(s) will not by itself result in failure\&. Behavior is undefined if +\fIsize\fR +is +\fB0\fR, or if +(\fIsize\fR + \fIextra\fR > \fBSIZE_T_MAX\fR)\&. +.PP +The +\fBsallocm\fR\fB\fR +function sets +\fI*rsize\fR +to the real size of the allocation\&. +.PP +The +\fBdallocm\fR\fB\fR +function causes the memory referenced by +\fIptr\fR +to be made available for future allocations\&. +.PP +The +\fBnallocm\fR\fB\fR +function allocates no memory, but it performs the same size computation as the +\fBallocm\fR\fB\fR +function, and if +\fIrsize\fR +is not +\fBNULL\fR +it sets +\fI*rsize\fR +to the real size of the allocation that would result from the equivalent +\fBallocm\fR\fB\fR +function call\&. Behavior is undefined if +\fIsize\fR +is +\fB0\fR\&. +.SH "TUNING" +.PP +Once, when the first call is made to one of the memory allocation routines, the allocator initializes its internals based in part on various options that can be specified at compile\- or run\-time\&. +.PP +The string pointed to by the global variable +\fImalloc_conf\fR, the \(lqname\(rq of the file referenced by the symbolic link named +/etc/malloc\&.conf, and the value of the environment variable +\fBMALLOC_CONF\fR, will be interpreted, in that order, from left to right as options\&. +.PP +An options string is a comma\-separated list of option:value pairs\&. There is one key corresponding to each +"opt\&.*" +mallctl (see the +MALLCTL NAMESPACE +section for options documentation)\&. For example, +abort:true,narenas:1 +sets the +"opt\&.abort" +and +"opt\&.narenas" +options\&. Some options have boolean values (true/false), others have integer values (base 8, 10, or 16, depending on prefix), and yet others have raw string values\&. +.SH "IMPLEMENTATION NOTES" +.PP +Traditionally, allocators have used +\fBsbrk\fR(2) +to obtain memory, which is suboptimal for several reasons, including race conditions, increased fragmentation, and artificial limitations on maximum usable memory\&. If +\fB\-\-enable\-dss\fR +is specified during configuration, this allocator uses both +\fBsbrk\fR(2) +and +\fBmmap\fR(2), in that order of preference; otherwise only +\fBmmap\fR(2) +is used\&. +.PP +This allocator uses multiple arenas in order to reduce lock contention for threaded programs on multi\-processor systems\&. This works well with regard to threading scalability, but incurs some costs\&. There is a small fixed per\-arena overhead, and additionally, arenas manage memory completely independently of each other, which means a small fixed increase in overall memory fragmentation\&. These overheads are not generally an issue, given the number of arenas normally used\&. Note that using substantially more arenas than the default is not likely to improve performance, mainly due to reduced cache performance\&. However, it may make sense to reduce the number of arenas if an application does not make much use of the allocation functions\&. +.PP +In addition to multiple arenas, unless +\fB\-\-disable\-tcache\fR +is specified during configuration, this allocator supports thread\-specific caching for small and large objects, in order to make it possible to completely avoid synchronization for most allocation requests\&. Such caching allows very fast allocation in the common case, but it increases memory usage and fragmentation, since a bounded number of objects can remain allocated in each thread cache\&. +.PP +Memory is conceptually broken into equal\-sized chunks, where the chunk size is a power of two that is greater than the page size\&. Chunks are always aligned to multiples of the chunk size\&. This alignment makes it possible to find metadata for user objects very quickly\&. +.PP +User objects are broken into three categories according to size: small, large, and huge\&. Small objects are smaller than one page\&. Large objects are smaller than the chunk size\&. Huge objects are a multiple of the chunk size\&. Small and large objects are managed by arenas; huge objects are managed separately in a single data structure that is shared by all threads\&. Huge objects are used by applications infrequently enough that this single data structure is not a scalability issue\&. +.PP +Each chunk that is managed by an arena tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one large object)\&. The combination of chunk alignment and chunk page maps makes it possible to determine all metadata regarding small and large allocations in constant time\&. +.PP +Small objects are managed in groups by page runs\&. Each run maintains a frontier and free list to track which regions are in use\&. Allocation requests that are no more than half the quantum (8 or 16, depending on architecture) are rounded up to the nearest power of two that is at least +sizeof(\fBdouble\fR)\&. All other small object size classes are multiples of the quantum, spaced such that internal fragmentation is limited to approximately 25% for all but the smallest size classes\&. Allocation requests that are larger than the maximum small size class, but small enough to fit in an arena\-managed chunk (see the +"opt\&.lg_chunk" +option), are rounded up to the nearest run size\&. Allocation requests that are too large to fit in an arena\-managed chunk are rounded up to the nearest multiple of the chunk size\&. +.PP +Allocations are packed tightly together, which can be an issue for multi\-threaded applications\&. If you need to assure that allocations do not suffer from cacheline sharing, round your allocation requests up to the nearest multiple of the cacheline size, or specify cacheline alignment when allocating\&. +.PP +Assuming 4 MiB chunks, 4 KiB pages, and a 16\-byte quantum on a 64\-bit system, the size classes in each category are as shown in +Table 1\&. +.sp +.it 1 an-trap +.nr an-no-space-flag 1 +.nr an-break-flag 1 +.br +.B Table\ \&1.\ \&Size classes +.TS +allbox tab(:); +lB rB lB. +T{ +Category +T}:T{ +Spacing +T}:T{ +Size +T} +.T& +l r l +^ r l +^ r l +^ r l +^ r l +^ r l +^ r l +l r l +l r l. +T{ +Small +T}:T{ +lg +T}:T{ +[8] +T} +:T{ +16 +T}:T{ +[16, 32, 48, \&.\&.\&., 128] +T} +:T{ +32 +T}:T{ +[160, 192, 224, 256] +T} +:T{ +64 +T}:T{ +[320, 384, 448, 512] +T} +:T{ +128 +T}:T{ +[640, 768, 896, 1024] +T} +:T{ +256 +T}:T{ +[1280, 1536, 1792, 2048] +T} +:T{ +512 +T}:T{ +[2560, 3072, 3584] +T} +T{ +Large +T}:T{ +4 KiB +T}:T{ +[4 KiB, 8 KiB, 12 KiB, \&.\&.\&., 4072 KiB] +T} +T{ +Huge +T}:T{ +4 MiB +T}:T{ +[4 MiB, 8 MiB, 12 MiB, \&.\&.\&.] +T} +.TE +.sp 1 +.SH "MALLCTL NAMESPACE" +.PP +The following names are defined in the namespace accessible via the +\fBmallctl*\fR\fB\fR +functions\&. Value types are specified in parentheses, their readable/writable statuses are encoded as +rw, +r\-, +\-w, or +\-\-, and required build configuration flags follow, if any\&. A name element encoded as + +or + +indicates an integer component, where the integer varies from 0 to some upper value that must be determined via introspection\&. In the case of +"stats\&.arenas\&.\&.*", + +equal to +"arenas\&.narenas" +can be used to access the summation of statistics from all arenas\&. Take special note of the +"epoch" +mallctl, which controls refreshing of cached dynamic statistics\&. +.PP +"version" (\fBconst char *\fR) r\- +.RS 4 +Return the jemalloc version string\&. +.RE +.PP +"epoch" (\fBuint64_t\fR) rw +.RS 4 +If a value is passed in, refresh the data from which the +\fBmallctl*\fR\fB\fR +functions report values, and increment the epoch\&. Return the current epoch\&. This is useful for detecting whether another thread caused a refresh\&. +.RE +.PP +"config\&.debug" (\fBbool\fR) r\- +.RS 4 +\fB\-\-enable\-debug\fR +was specified during build configuration\&. +.RE +.PP +"config\&.dss" (\fBbool\fR) r\- +.RS 4 +\fB\-\-enable\-dss\fR +was specified during build configuration\&. +.RE +.PP +"config\&.fill" (\fBbool\fR) r\- +.RS 4 +\fB\-\-enable\-fill\fR +was specified during build configuration\&. +.RE +.PP +"config\&.lazy_lock" (\fBbool\fR) r\- +.RS 4 +\fB\-\-enable\-lazy\-lock\fR +was specified during build configuration\&. +.RE +.PP +"config\&.munmap" (\fBbool\fR) r\- +.RS 4 +\fB\-\-enable\-munmap\fR +was specified during build configuration\&. +.RE +.PP +"config\&.prof" (\fBbool\fR) r\- +.RS 4 +\fB\-\-enable\-prof\fR +was specified during build configuration\&. +.RE +.PP +"config\&.prof_libgcc" (\fBbool\fR) r\- +.RS 4 +\fB\-\-disable\-prof\-libgcc\fR +was not specified during build configuration\&. +.RE +.PP +"config\&.prof_libunwind" (\fBbool\fR) r\- +.RS 4 +\fB\-\-enable\-prof\-libunwind\fR +was specified during build configuration\&. +.RE +.PP +"config\&.stats" (\fBbool\fR) r\- +.RS 4 +\fB\-\-enable\-stats\fR +was specified during build configuration\&. +.RE +.PP +"config\&.tcache" (\fBbool\fR) r\- +.RS 4 +\fB\-\-disable\-tcache\fR +was not specified during build configuration\&. +.RE +.PP +"config\&.tls" (\fBbool\fR) r\- +.RS 4 +\fB\-\-disable\-tls\fR +was not specified during build configuration\&. +.RE +.PP +"config\&.utrace" (\fBbool\fR) r\- +.RS 4 +\fB\-\-enable\-utrace\fR +was specified during build configuration\&. +.RE +.PP +"config\&.valgrind" (\fBbool\fR) r\- +.RS 4 +\fB\-\-enable\-valgrind\fR +was specified during build configuration\&. +.RE +.PP +"config\&.xmalloc" (\fBbool\fR) r\- +.RS 4 +\fB\-\-enable\-xmalloc\fR +was specified during build configuration\&. +.RE +.PP +"opt\&.abort" (\fBbool\fR) r\- +.RS 4 +Abort\-on\-warning enabled/disabled\&. If true, most warnings are fatal\&. The process will call +\fBabort\fR(3) +in these cases\&. This option is disabled by default unless +\fB\-\-enable\-debug\fR +is specified during configuration, in which case it is enabled by default\&. +.RE +.PP +"opt\&.lg_chunk" (\fBsize_t\fR) r\- +.RS 4 +Virtual memory chunk size (log base 2)\&. The default chunk size is 4 MiB (2^22)\&. +.RE +.PP +"opt\&.narenas" (\fBsize_t\fR) r\- +.RS 4 +Maximum number of arenas to use\&. The default maximum number of arenas is four times the number of CPUs, or one if there is a single CPU\&. +.RE +.PP +"opt\&.lg_dirty_mult" (\fBssize_t\fR) r\- +.RS 4 +Per\-arena minimum ratio (log base 2) of active to dirty pages\&. Some dirty unused pages may be allowed to accumulate, within the limit set by the ratio (or one chunk worth of dirty pages, whichever is greater), before informing the kernel about some of those pages via +\fBmadvise\fR(2) +or a similar system call\&. This provides the kernel with sufficient information to recycle dirty pages if physical memory becomes scarce and the pages remain unused\&. The default minimum ratio is 32:1 (2^5:1); an option value of \-1 will disable dirty page purging\&. +.RE +.PP +"opt\&.stats_print" (\fBbool\fR) r\- +.RS 4 +Enable/disable statistics printing at exit\&. If enabled, the +\fBmalloc_stats_print\fR\fB\fR +function is called at program exit via an +\fBatexit\fR(3) +function\&. If +\fB\-\-enable\-stats\fR +is specified during configuration, this has the potential to cause deadlock for a multi\-threaded process that exits while one or more threads are executing in the memory allocation functions\&. Therefore, this option should only be used with care; it is primarily intended as a performance tuning aid during application development\&. This option is disabled by default\&. +.RE +.PP +"opt\&.junk" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] +.RS 4 +Junk filling enabled/disabled\&. If enabled, each byte of uninitialized allocated memory will be initialized to +0xa5\&. All deallocated memory will be initialized to +0x5a\&. This is intended for debugging and will impact performance negatively\&. This option is disabled by default unless +\fB\-\-enable\-debug\fR +is specified during configuration, in which case it is enabled by default\&. +.RE +.PP +"opt\&.quarantine" (\fBsize_t\fR) r\- [\fB\-\-enable\-fill\fR] +.RS 4 +Per thread quarantine size in bytes\&. If non\-zero, each thread maintains a FIFO object quarantine that stores up to the specified number of bytes of memory\&. The quarantined memory is not freed until it is released from quarantine, though it is immediately junk\-filled if the +"opt\&.junk" +option is enabled\&. This feature is of particular use in combination with +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, which can detect attempts to access quarantined objects\&. This is intended for debugging and will impact performance negatively\&. The default quarantine size is 0\&. +.RE +.PP +"opt\&.redzone" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] +.RS 4 +Redzones enabled/disabled\&. If enabled, small allocations have redzones before and after them\&. Furthermore, if the +"opt\&.junk" +option is enabled, the redzones are checked for corruption during deallocation\&. However, the primary intended purpose of this feature is to be used in combination with +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, which needs redzones in order to do effective buffer overflow/underflow detection\&. This option is intended for debugging and will impact performance negatively\&. This option is disabled by default\&. +.RE +.PP +"opt\&.zero" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR] +.RS 4 +Zero filling enabled/disabled\&. If enabled, each byte of uninitialized allocated memory will be initialized to 0\&. Note that this initialization only happens once for each byte, so +\fBrealloc\fR\fB\fR +and +\fBrallocm\fR\fB\fR +calls do not zero memory that was previously allocated\&. This is intended for debugging and will impact performance negatively\&. This option is disabled by default\&. +.RE +.PP +"opt\&.utrace" (\fBbool\fR) r\- [\fB\-\-enable\-utrace\fR] +.RS 4 +Allocation tracing based on +\fButrace\fR(2) +enabled/disabled\&. This option is disabled by default\&. +.RE +.PP +"opt\&.valgrind" (\fBbool\fR) r\- [\fB\-\-enable\-valgrind\fR] +.RS 4 +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2 +support enabled/disabled\&. If enabled, several other options are automatically modified during options processing to work well with Valgrind: +"opt\&.junk" +and +"opt\&.zero" +are set to false, +"opt\&.quarantine" +is set to 16 MiB, and +"opt\&.redzone" +is set to true\&. This option is disabled by default\&. +.RE +.PP +"opt\&.xmalloc" (\fBbool\fR) r\- [\fB\-\-enable\-xmalloc\fR] +.RS 4 +Abort\-on\-out\-of\-memory enabled/disabled\&. If enabled, rather than returning failure for any allocation function, display a diagnostic message on +\fBSTDERR_FILENO\fR +and cause the program to drop core (using +\fBabort\fR(3))\&. If an application is designed to depend on this behavior, set the option at compile time by including the following in the source code: +.sp +.if n \{\ +.RS 4 +.\} +.nf +malloc_conf = "xmalloc:true"; +.fi +.if n \{\ +.RE +.\} +.sp +This option is disabled by default\&. +.RE +.PP +"opt\&.tcache" (\fBbool\fR) r\- [\fB\-\-enable\-tcache\fR] +.RS 4 +Thread\-specific caching enabled/disabled\&. When there are multiple threads, each thread uses a thread\-specific cache for objects up to a certain size\&. Thread\-specific caching allows many allocations to be satisfied without performing any thread synchronization, at the cost of increased memory use\&. See the +"opt\&.lg_tcache_max" +option for related tuning information\&. This option is enabled by default\&. +.RE +.PP +"opt\&.lg_tcache_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR] +.RS 4 +Maximum size class (log base 2) to cache in the thread\-specific cache\&. At a minimum, all small size classes are cached, and at a maximum all large size classes are cached\&. The default maximum is 32 KiB (2^15)\&. +.RE +.PP +"opt\&.prof" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +.RS 4 +Memory profiling enabled/disabled\&. If enabled, profile memory allocation activity, and use an +\fBatexit\fR(3) +function to dump final memory usage to a file named according to the pattern +\&.\&.\&.f\&.heap, where + +is controlled by the +"opt\&.prof_prefix" +option\&. See the +"opt\&.prof_active" +option for on\-the\-fly activation/deactivation\&. See the +"opt\&.lg_prof_sample" +option for probabilistic sampling control\&. See the +"opt\&.prof_accum" +option for control of cumulative sample reporting\&. See the +"opt\&.lg_prof_interval" +option for information on interval\-triggered profile dumping, and the +"opt\&.prof_gdump" +option for information on high\-water\-triggered profile dumping\&. Profile output is compatible with the included +\fBpprof\fR +Perl script, which originates from the +\m[blue]\fBgoogle\-perftools package\fR\m[]\&\s-2\u[3]\d\s+2\&. +.RE +.PP +"opt\&.prof_prefix" (\fBconst char *\fR) r\- [\fB\-\-enable\-prof\fR] +.RS 4 +Filename prefix for profile dumps\&. If the prefix is set to the empty string, no automatic dumps will occur; this is primarily useful for disabling the automatic final heap dump (which also disables leak reporting, if enabled)\&. The default prefix is +jeprof\&. +.RE +.PP +"opt\&.prof_active" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +.RS 4 +Profiling activated/deactivated\&. This is a secondary control mechanism that makes it possible to start the application with profiling enabled (see the +"opt\&.prof" +option) but inactive, then toggle profiling at any time during program execution with the +"prof\&.active" +mallctl\&. This option is enabled by default\&. +.RE +.PP +"opt\&.lg_prof_sample" (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR] +.RS 4 +Average interval (log base 2) between allocation samples, as measured in bytes of allocation activity\&. Increasing the sampling interval decreases profile fidelity, but also decreases the computational overhead\&. The default sample interval is 1 (2^0) (i\&.e\&. all allocations are sampled)\&. +.RE +.PP +"opt\&.prof_accum" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +.RS 4 +Reporting of cumulative object/byte counts in profile dumps enabled/disabled\&. If this option is enabled, every unique backtrace must be stored for the duration of execution\&. Depending on the application, this can impose a large memory overhead, and the cumulative counts are not always of interest\&. This option is enabled by default\&. +.RE +.PP +"opt\&.lg_prof_interval" (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR] +.RS 4 +Average interval (log base 2) between memory profile dumps, as measured in bytes of allocation activity\&. The actual interval between dumps may be sporadic because decentralized allocation counters are used to avoid synchronization bottlenecks\&. Profiles are dumped to files named according to the pattern +\&.\&.\&.i\&.heap, where + +is controlled by the +"opt\&.prof_prefix" +option\&. By default, interval\-triggered profile dumping is disabled (encoded as \-1)\&. +.RE +.PP +"opt\&.prof_gdump" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +.RS 4 +Trigger a memory profile dump every time the total virtual memory exceeds the previous maximum\&. Profiles are dumped to files named according to the pattern +\&.\&.\&.u\&.heap, where + +is controlled by the +"opt\&.prof_prefix" +option\&. This option is disabled by default\&. +.RE +.PP +"opt\&.prof_leak" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] +.RS 4 +Leak reporting enabled/disabled\&. If enabled, use an +\fBatexit\fR(3) +function to report memory leaks detected by allocation sampling\&. See the +"opt\&.prof" +option for information on analyzing heap profile output\&. This option is disabled by default\&. +.RE +.PP +"thread\&.arena" (\fBunsigned\fR) rw +.RS 4 +Get or set the arena associated with the calling thread\&. The arena index must be less than the maximum number of arenas (see the +"arenas\&.narenas" +mallctl)\&. If the specified arena was not initialized beforehand (see the +"arenas\&.initialized" +mallctl), it will be automatically initialized as a side effect of calling this interface\&. +.RE +.PP +"thread\&.allocated" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Get the total number of bytes ever allocated by the calling thread\&. This counter has the potential to wrap around; it is up to the application to appropriately interpret the counter in such cases\&. +.RE +.PP +"thread\&.allocatedp" (\fBuint64_t *\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Get a pointer to the the value that is returned by the +"thread\&.allocated" +mallctl\&. This is useful for avoiding the overhead of repeated +\fBmallctl*\fR\fB\fR +calls\&. +.RE +.PP +"thread\&.deallocated" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Get the total number of bytes ever deallocated by the calling thread\&. This counter has the potential to wrap around; it is up to the application to appropriately interpret the counter in such cases\&. +.RE +.PP +"thread\&.deallocatedp" (\fBuint64_t *\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Get a pointer to the the value that is returned by the +"thread\&.deallocated" +mallctl\&. This is useful for avoiding the overhead of repeated +\fBmallctl*\fR\fB\fR +calls\&. +.RE +.PP +"thread\&.tcache\&.enabled" (\fBbool\fR) rw [\fB\-\-enable\-tcache\fR] +.RS 4 +Enable/disable calling thread\*(Aqs tcache\&. The tcache is implicitly flushed as a side effect of becoming disabled (see +"thread\&.tcache\&.flush")\&. +.RE +.PP +"thread\&.tcache\&.flush" (\fBvoid\fR) \-\- [\fB\-\-enable\-tcache\fR] +.RS 4 +Flush calling thread\*(Aqs tcache\&. This interface releases all cached objects and internal data structures associated with the calling thread\*(Aqs thread\-specific cache\&. Ordinarily, this interface need not be called, since automatic periodic incremental garbage collection occurs, and the thread cache is automatically discarded when a thread exits\&. However, garbage collection is triggered by allocation activity, so it is possible for a thread that stops allocating/deallocating to retain its cache indefinitely, in which case the developer may find manual flushing useful\&. +.RE +.PP +"arenas\&.narenas" (\fBunsigned\fR) r\- +.RS 4 +Maximum number of arenas\&. +.RE +.PP +"arenas\&.initialized" (\fBbool *\fR) r\- +.RS 4 +An array of +"arenas\&.narenas" +booleans\&. Each boolean indicates whether the corresponding arena is initialized\&. +.RE +.PP +"arenas\&.quantum" (\fBsize_t\fR) r\- +.RS 4 +Quantum size\&. +.RE +.PP +"arenas\&.page" (\fBsize_t\fR) r\- +.RS 4 +Page size\&. +.RE +.PP +"arenas\&.tcache_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR] +.RS 4 +Maximum thread\-cached size class\&. +.RE +.PP +"arenas\&.nbins" (\fBunsigned\fR) r\- +.RS 4 +Number of bin size classes\&. +.RE +.PP +"arenas\&.nhbins" (\fBunsigned\fR) r\- [\fB\-\-enable\-tcache\fR] +.RS 4 +Total number of thread cache bin size classes\&. +.RE +.PP +"arenas\&.bin\&.\&.size" (\fBsize_t\fR) r\- +.RS 4 +Maximum size supported by size class\&. +.RE +.PP +"arenas\&.bin\&.\&.nregs" (\fBuint32_t\fR) r\- +.RS 4 +Number of regions per page run\&. +.RE +.PP +"arenas\&.bin\&.\&.run_size" (\fBsize_t\fR) r\- +.RS 4 +Number of bytes per page run\&. +.RE +.PP +"arenas\&.nlruns" (\fBsize_t\fR) r\- +.RS 4 +Total number of large size classes\&. +.RE +.PP +"arenas\&.lrun\&.\&.size" (\fBsize_t\fR) r\- +.RS 4 +Maximum size supported by this large size class\&. +.RE +.PP +"arenas\&.purge" (\fBunsigned\fR) \-w +.RS 4 +Purge unused dirty pages for the specified arena, or for all arenas if none is specified\&. +.RE +.PP +"prof\&.active" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] +.RS 4 +Control whether sampling is currently active\&. See the +"opt\&.prof_active" +option for additional information\&. +.RE +.PP +"prof\&.dump" (\fBconst char *\fR) \-w [\fB\-\-enable\-prof\fR] +.RS 4 +Dump a memory profile to the specified file, or if NULL is specified, to a file according to the pattern +\&.\&.\&.m\&.heap, where + +is controlled by the +"opt\&.prof_prefix" +option\&. +.RE +.PP +"prof\&.interval" (\fBuint64_t\fR) r\- [\fB\-\-enable\-prof\fR] +.RS 4 +Average number of bytes allocated between inverval\-based profile dumps\&. See the +"opt\&.lg_prof_interval" +option for additional information\&. +.RE +.PP +"stats\&.cactive" (\fBsize_t *\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Pointer to a counter that contains an approximate count of the current number of bytes in active pages\&. The estimate may be high, but never low, because each arena rounds up to the nearest multiple of the chunk size when computing its contribution to the counter\&. Note that the +"epoch" +mallctl has no bearing on this counter\&. Furthermore, counter consistency is maintained via atomic operations, so it is necessary to use an atomic operation in order to guarantee a consistent read when dereferencing the pointer\&. +.RE +.PP +"stats\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Total number of bytes allocated by the application\&. +.RE +.PP +"stats\&.active" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Total number of bytes in active pages allocated by the application\&. This is a multiple of the page size, and greater than or equal to +"stats\&.allocated"\&. +.RE +.PP +"stats\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Total number of bytes in chunks mapped on behalf of the application\&. This is a multiple of the chunk size, and is at least as large as +"stats\&.active"\&. This does not include inactive chunks embedded in the DSS\&. +.RE +.PP +"stats\&.chunks\&.current" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Total number of chunks actively mapped on behalf of the application\&. This does not include inactive chunks embedded in the DSS\&. +.RE +.PP +"stats\&.chunks\&.total" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Cumulative number of chunks allocated\&. +.RE +.PP +"stats\&.chunks\&.high" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Maximum number of active chunks at any time thus far\&. +.RE +.PP +"stats\&.huge\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Number of bytes currently allocated by huge objects\&. +.RE +.PP +"stats\&.huge\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Cumulative number of huge allocation requests\&. +.RE +.PP +"stats\&.huge\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Cumulative number of huge deallocation requests\&. +.RE +.PP +"stats\&.arenas\&.\&.nthreads" (\fBunsigned\fR) r\- +.RS 4 +Number of threads currently assigned to arena\&. +.RE +.PP +"stats\&.arenas\&.\&.pactive" (\fBsize_t\fR) r\- +.RS 4 +Number of pages in active runs\&. +.RE +.PP +"stats\&.arenas\&.\&.pdirty" (\fBsize_t\fR) r\- +.RS 4 +Number of pages within unused runs that are potentially dirty, and for which +\fBmadvise\fR\fB\fI\&.\&.\&.\fR\fR\fB \fR\fB\fI\fBMADV_DONTNEED\fR\fR\fR +or similar has not been called\&. +.RE +.PP +"stats\&.arenas\&.\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Number of mapped bytes\&. +.RE +.PP +"stats\&.arenas\&.\&.npurge" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Number of dirty page purge sweeps performed\&. +.RE +.PP +"stats\&.arenas\&.\&.nmadvise" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Number of +\fBmadvise\fR\fB\fI\&.\&.\&.\fR\fR\fB \fR\fB\fI\fBMADV_DONTNEED\fR\fR\fR +or similar calls made to purge dirty pages\&. +.RE +.PP +"stats\&.arenas\&.\&.npurged" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Number of pages purged\&. +.RE +.PP +"stats\&.arenas\&.\&.small\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Number of bytes currently allocated by small objects\&. +.RE +.PP +"stats\&.arenas\&.\&.small\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Cumulative number of allocation requests served by small bins\&. +.RE +.PP +"stats\&.arenas\&.\&.small\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Cumulative number of small objects returned to bins\&. +.RE +.PP +"stats\&.arenas\&.\&.small\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Cumulative number of small allocation requests\&. +.RE +.PP +"stats\&.arenas\&.\&.large\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Number of bytes currently allocated by large objects\&. +.RE +.PP +"stats\&.arenas\&.\&.large\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Cumulative number of large allocation requests served directly by the arena\&. +.RE +.PP +"stats\&.arenas\&.\&.large\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Cumulative number of large deallocation requests served directly by the arena\&. +.RE +.PP +"stats\&.arenas\&.\&.large\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Cumulative number of large allocation requests\&. +.RE +.PP +"stats\&.arenas\&.\&.bins\&.\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Current number of bytes allocated by bin\&. +.RE +.PP +"stats\&.arenas\&.\&.bins\&.\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Cumulative number of allocations served by bin\&. +.RE +.PP +"stats\&.arenas\&.\&.bins\&.\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Cumulative number of allocations returned to bin\&. +.RE +.PP +"stats\&.arenas\&.\&.bins\&.\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Cumulative number of allocation requests\&. +.RE +.PP +"stats\&.arenas\&.\&.bins\&.\&.nfills" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR] +.RS 4 +Cumulative number of tcache fills\&. +.RE +.PP +"stats\&.arenas\&.\&.bins\&.\&.nflushes" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR] +.RS 4 +Cumulative number of tcache flushes\&. +.RE +.PP +"stats\&.arenas\&.\&.bins\&.\&.nruns" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Cumulative number of runs created\&. +.RE +.PP +"stats\&.arenas\&.\&.bins\&.\&.nreruns" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Cumulative number of times the current run from which to allocate changed\&. +.RE +.PP +"stats\&.arenas\&.\&.bins\&.\&.curruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Current number of runs\&. +.RE +.PP +"stats\&.arenas\&.\&.lruns\&.\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Cumulative number of allocation requests for this size class served directly by the arena\&. +.RE +.PP +"stats\&.arenas\&.\&.lruns\&.\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Cumulative number of deallocation requests for this size class served directly by the arena\&. +.RE +.PP +"stats\&.arenas\&.\&.lruns\&.\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Cumulative number of allocation requests for this size class\&. +.RE +.PP +"stats\&.arenas\&.\&.lruns\&.\&.curruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Current number of runs for this size class\&. +.RE +.SH "DEBUGGING MALLOC PROBLEMS" +.PP +When debugging, it is a good idea to configure/build jemalloc with the +\fB\-\-enable\-debug\fR +and +\fB\-\-enable\-fill\fR +options, and recompile the program with suitable options and symbols for debugger support\&. When so configured, jemalloc incorporates a wide variety of run\-time assertions that catch application errors such as double\-free, write\-after\-free, etc\&. +.PP +Programs often accidentally depend on \(lquninitialized\(rq memory actually being filled with zero bytes\&. Junk filling (see the +"opt\&.junk" +option) tends to expose such bugs in the form of obviously incorrect results and/or coredumps\&. Conversely, zero filling (see the +"opt\&.zero" +option) eliminates the symptoms of such bugs\&. Between these two options, it is usually possible to quickly detect, diagnose, and eliminate such bugs\&. +.PP +This implementation does not provide much detail about the problems it detects, because the performance impact for storing such information would be prohibitive\&. However, jemalloc does integrate with the most excellent +\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2 +tool if the +\fB\-\-enable\-valgrind\fR +configuration option is enabled and the +"opt\&.valgrind" +option is enabled\&. +.SH "DIAGNOSTIC MESSAGES" +.PP +If any of the memory allocation/deallocation functions detect an error or warning condition, a message will be printed to file descriptor +\fBSTDERR_FILENO\fR\&. Errors will result in the process dumping core\&. If the +"opt\&.abort" +option is set, most warnings are treated as errors\&. +.PP +The +\fImalloc_message\fR +variable allows the programmer to override the function which emits the text strings forming the errors and warnings if for some reason the +\fBSTDERR_FILENO\fR +file descriptor is not suitable for this\&. +\fBmalloc_message\fR\fB\fR +takes the +\fIcbopaque\fR +pointer argument that is +\fBNULL\fR +unless overridden by the arguments in a call to +\fBmalloc_stats_print\fR\fB\fR, followed by a string pointer\&. Please note that doing anything which tries to allocate memory in this function is likely to result in a crash or deadlock\&. +.PP +All messages are prefixed by \(lq:\(rq\&. +.SH "RETURN VALUES" +.SS "Standard API" +.PP +The +\fBmalloc\fR\fB\fR +and +\fBcalloc\fR\fB\fR +functions return a pointer to the allocated memory if successful; otherwise a +\fBNULL\fR +pointer is returned and +\fIerrno\fR +is set to +ENOMEM\&. +.PP +The +\fBposix_memalign\fR\fB\fR +function returns the value 0 if successful; otherwise it returns an error value\&. The +\fBposix_memalign\fR\fB\fR +function will fail if: +.PP +EINVAL +.RS 4 +The +\fIalignment\fR +parameter is not a power of 2 at least as large as +sizeof(\fBvoid *\fR)\&. +.RE +.PP +ENOMEM +.RS 4 +Memory allocation error\&. +.RE +.PP +The +\fBaligned_alloc\fR\fB\fR +function returns a pointer to the allocated memory if successful; otherwise a +\fBNULL\fR +pointer is returned and +\fIerrno\fR +is set\&. The +\fBaligned_alloc\fR\fB\fR +function will fail if: +.PP +EINVAL +.RS 4 +The +\fIalignment\fR +parameter is not a power of 2\&. +.RE +.PP +ENOMEM +.RS 4 +Memory allocation error\&. +.RE +.PP +The +\fBrealloc\fR\fB\fR +function returns a pointer, possibly identical to +\fIptr\fR, to the allocated memory if successful; otherwise a +\fBNULL\fR +pointer is returned, and +\fIerrno\fR +is set to +ENOMEM +if the error was the result of an allocation failure\&. The +\fBrealloc\fR\fB\fR +function always leaves the original buffer intact when an error occurs\&. +.PP +The +\fBfree\fR\fB\fR +function returns no value\&. +.SS "Non\-standard API" +.PP +The +\fBmalloc_usable_size\fR\fB\fR +function returns the usable size of the allocation pointed to by +\fIptr\fR\&. +.PP +The +\fBmallctl\fR\fB\fR, +\fBmallctlnametomib\fR\fB\fR, and +\fBmallctlbymib\fR\fB\fR +functions return 0 on success; otherwise they return an error value\&. The functions will fail if: +.PP +EINVAL +.RS 4 +\fInewp\fR +is not +\fBNULL\fR, and +\fInewlen\fR +is too large or too small\&. Alternatively, +\fI*oldlenp\fR +is too large or too small; in this case as much data as possible are read despite the error\&. +.RE +.PP +ENOMEM +.RS 4 +\fI*oldlenp\fR +is too short to hold the requested value\&. +.RE +.PP +ENOENT +.RS 4 +\fIname\fR +or +\fImib\fR +specifies an unknown/invalid value\&. +.RE +.PP +EPERM +.RS 4 +Attempt to read or write void value, or attempt to write read\-only value\&. +.RE +.PP +EAGAIN +.RS 4 +A memory allocation failure occurred\&. +.RE +.PP +EFAULT +.RS 4 +An interface with side effects failed in some way not directly related to +\fBmallctl*\fR\fB\fR +read/write processing\&. +.RE +.SS "Experimental API" +.PP +The +\fBallocm\fR\fB\fR, +\fBrallocm\fR\fB\fR, +\fBsallocm\fR\fB\fR, +\fBdallocm\fR\fB\fR, and +\fBnallocm\fR\fB\fR +functions return +\fBALLOCM_SUCCESS\fR +on success; otherwise they return an error value\&. The +\fBallocm\fR\fB\fR, +\fBrallocm\fR\fB\fR, and +\fBnallocm\fR\fB\fR +functions will fail if: +.PP +ALLOCM_ERR_OOM +.RS 4 +Out of memory\&. Insufficient contiguous memory was available to service the allocation request\&. The +\fBallocm\fR\fB\fR +function additionally sets +\fI*ptr\fR +to +\fBNULL\fR, whereas the +\fBrallocm\fR\fB\fR +function leaves +\fB*ptr\fR +unmodified\&. +.RE +The +\fBrallocm\fR\fB\fR +function will also fail if: +.PP +ALLOCM_ERR_NOT_MOVED +.RS 4 +\fBALLOCM_NO_MOVE\fR +was specified, but the reallocation request could not be serviced without moving the object\&. +.RE +.SH "ENVIRONMENT" +.PP +The following environment variable affects the execution of the allocation functions: +.PP +\fBMALLOC_CONF\fR +.RS 4 +If the environment variable +\fBMALLOC_CONF\fR +is set, the characters it contains will be interpreted as options\&. +.RE +.SH "EXAMPLES" +.PP +To dump core whenever a problem occurs: +.sp +.if n \{\ +.RS 4 +.\} +.nf +ln \-s \*(Aqabort:true\*(Aq /etc/malloc\&.conf +.fi +.if n \{\ +.RE +.\} +.PP +To specify in the source a chunk size that is 16 MiB: +.sp +.if n \{\ +.RS 4 +.\} +.nf +malloc_conf = "lg_chunk:24"; +.fi +.if n \{\ +.RE +.\} +.SH "SEE ALSO" +.PP +\fBmadvise\fR(2), +\fBmmap\fR(2), +\fBsbrk\fR(2), +\fButrace\fR(2), +\fBalloca\fR(3), +\fBatexit\fR(3), +\fBgetpagesize\fR(3) +.SH "STANDARDS" +.PP +The +\fBmalloc\fR\fB\fR, +\fBcalloc\fR\fB\fR, +\fBrealloc\fR\fB\fR, and +\fBfree\fR\fB\fR +functions conform to ISO/IEC 9899:1990 (\(lqISO C90\(rq)\&. +.PP +The +\fBposix_memalign\fR\fB\fR +function conforms to IEEE Std 1003\&.1\-2001 (\(lqPOSIX\&.1\(rq)\&. +.SH "HISTORY" +.PP +The +\fBmalloc_usable_size\fR\fB\fR +and +\fBposix_memalign\fR\fB\fR +functions first appeared in FreeBSD 7\&.0\&. +.PP +The +\fBaligned_alloc\fR\fB\fR, +\fBmalloc_stats_print\fR\fB\fR, +\fBmallctl*\fR\fB\fR, and +\fB*allocm\fR\fB\fR +functions first appeared in FreeBSD 10\&.0\&. +.SH "AUTHOR" +.PP +\fBJason Evans\fR +.RS 4 +.RE +.SH "NOTES" +.IP " 1." 4 +jemalloc website +.RS 4 +\%http://www.canonware.com/jemalloc/ +.RE +.IP " 2." 4 +Valgrind +.RS 4 +\%http://http://valgrind.org/ +.RE +.IP " 3." 4 +google-perftools package +.RS 4 +\%http://code.google.com/p/google-perftools/ +.RE diff --git a/contrib/jemalloc/include/jemalloc/internal/arena.h b/contrib/jemalloc/include/jemalloc/internal/arena.h new file mode 100644 index 000000000000..3790818c417d --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/arena.h @@ -0,0 +1,685 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized + * as small as possible such that this setting is still honored, without + * violating other constraints. The goal is to make runs as small as possible + * without exceeding a per run external fragmentation threshold. + * + * We use binary fixed point math for overhead computations, where the binary + * point is implicitly RUN_BFP bits to the left. + * + * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be + * honored for some/all object sizes, since when heap profiling is enabled + * there is one pointer of header overhead per object (plus a constant). This + * constraint is relaxed (ignored) for runs that are so small that the + * per-region overhead is greater than: + * + * (RUN_MAX_OVRHD / (reg_interval << (3+RUN_BFP)) + */ +#define RUN_BFP 12 +/* \/ Implicit binary fixed point. */ +#define RUN_MAX_OVRHD 0x0000003dU +#define RUN_MAX_OVRHD_RELAX 0x00001800U + +/* Maximum number of regions in one run. */ +#define LG_RUN_MAXREGS 11 +#define RUN_MAXREGS (1U << LG_RUN_MAXREGS) + +/* + * Minimum redzone size. Redzones may be larger than this if necessary to + * preserve region alignment. + */ +#define REDZONE_MINSIZE 16 + +/* + * The minimum ratio of active:dirty pages per arena is computed as: + * + * (nactive >> opt_lg_dirty_mult) >= ndirty + * + * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32 + * times as many active pages as dirty pages. + */ +#define LG_DIRTY_MULT_DEFAULT 5 + +typedef struct arena_chunk_map_s arena_chunk_map_t; +typedef struct arena_chunk_s arena_chunk_t; +typedef struct arena_run_s arena_run_t; +typedef struct arena_bin_info_s arena_bin_info_t; +typedef struct arena_bin_s arena_bin_t; +typedef struct arena_s arena_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +/* Each element of the chunk map corresponds to one page within the chunk. */ +struct arena_chunk_map_s { +#ifndef JEMALLOC_PROF + /* + * Overlay prof_ctx in order to allow it to be referenced by dead code. + * Such antics aren't warranted for per arena data structures, but + * chunk map overhead accounts for a percentage of memory, rather than + * being just a fixed cost. + */ + union { +#endif + union { + /* + * Linkage for run trees. There are two disjoint uses: + * + * 1) arena_t's runs_avail_{clean,dirty} trees. + * 2) arena_run_t conceptually uses this linkage for in-use + * non-full runs, rather than directly embedding linkage. + */ + rb_node(arena_chunk_map_t) rb_link; + /* + * List of runs currently in purgatory. arena_chunk_purge() + * temporarily allocates runs that contain dirty pages while + * purging, so that other threads cannot use the runs while the + * purging thread is operating without the arena lock held. + */ + ql_elm(arena_chunk_map_t) ql_link; + } u; + + /* Profile counters, used for large object runs. */ + prof_ctx_t *prof_ctx; +#ifndef JEMALLOC_PROF + }; /* union { ... }; */ +#endif + + /* + * Run address (or size) and various flags are stored together. The bit + * layout looks like (assuming 32-bit system): + * + * ???????? ???????? ????---- ----dula + * + * ? : Unallocated: Run address for first/last pages, unset for internal + * pages. + * Small: Run page offset. + * Large: Run size for first page, unset for trailing pages. + * - : Unused. + * d : dirty? + * u : unzeroed? + * l : large? + * a : allocated? + * + * Following are example bit patterns for the three types of runs. + * + * p : run page offset + * s : run size + * c : (binind+1) for size class (used only if prof_promote is true) + * x : don't care + * - : 0 + * + : 1 + * [DULA] : bit set + * [dula] : bit unset + * + * Unallocated (clean): + * ssssssss ssssssss ssss---- ----du-a + * xxxxxxxx xxxxxxxx xxxx---- -----Uxx + * ssssssss ssssssss ssss---- ----dU-a + * + * Unallocated (dirty): + * ssssssss ssssssss ssss---- ----D--a + * xxxxxxxx xxxxxxxx xxxx---- ----xxxx + * ssssssss ssssssss ssss---- ----D--a + * + * Small: + * pppppppp pppppppp pppp---- ----d--A + * pppppppp pppppppp pppp---- -------A + * pppppppp pppppppp pppp---- ----d--A + * + * Large: + * ssssssss ssssssss ssss---- ----D-LA + * xxxxxxxx xxxxxxxx xxxx---- ----xxxx + * -------- -------- -------- ----D-LA + * + * Large (sampled, size <= PAGE): + * ssssssss ssssssss sssscccc ccccD-LA + * + * Large (not sampled, size == PAGE): + * ssssssss ssssssss ssss---- ----D-LA + */ + size_t bits; +#define CHUNK_MAP_CLASS_SHIFT 4 +#define CHUNK_MAP_CLASS_MASK ((size_t)0xff0U) +#define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU) +#define CHUNK_MAP_DIRTY ((size_t)0x8U) +#define CHUNK_MAP_UNZEROED ((size_t)0x4U) +#define CHUNK_MAP_LARGE ((size_t)0x2U) +#define CHUNK_MAP_ALLOCATED ((size_t)0x1U) +#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED +}; +typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t; +typedef rb_tree(arena_chunk_map_t) arena_run_tree_t; + +/* Arena chunk header. */ +struct arena_chunk_s { + /* Arena that owns the chunk. */ + arena_t *arena; + + /* Linkage for the arena's chunks_dirty list. */ + ql_elm(arena_chunk_t) link_dirty; + + /* + * True if the chunk is currently in the chunks_dirty list, due to + * having at some point contained one or more dirty pages. Removal + * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible. + */ + bool dirtied; + + /* Number of dirty pages. */ + size_t ndirty; + + /* + * Map of pages within chunk that keeps track of free/large/small. The + * first map_bias entries are omitted, since the chunk header does not + * need to be tracked in the map. This omission saves a header page + * for common chunk sizes (e.g. 4 MiB). + */ + arena_chunk_map_t map[1]; /* Dynamically sized. */ +}; +typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; + +struct arena_run_s { + /* Bin this run is associated with. */ + arena_bin_t *bin; + + /* Index of next region that has never been allocated, or nregs. */ + uint32_t nextind; + + /* Number of free regions in run. */ + unsigned nfree; +}; + +/* + * Read-only information associated with each element of arena_t's bins array + * is stored separately, partly to reduce memory usage (only one copy, rather + * than one per arena), but mainly to avoid false cacheline sharing. + * + * Each run has the following layout: + * + * /--------------------\ + * | arena_run_t header | + * | ... | + * bitmap_offset | bitmap | + * | ... | + * ctx0_offset | ctx map | + * | ... | + * |--------------------| + * | redzone | + * reg0_offset | region 0 | + * | redzone | + * |--------------------| \ + * | redzone | | + * | region 1 | > reg_interval + * | redzone | / + * |--------------------| + * | ... | + * | ... | + * | ... | + * |--------------------| + * | redzone | + * | region nregs-1 | + * | redzone | + * |--------------------| + * | alignment pad? | + * \--------------------/ + * + * reg_interval has at least the same minimum alignment as reg_size; this + * preserves the alignment constraint that sa2u() depends on. Alignment pad is + * either 0 or redzone_size; it is present only if needed to align reg0_offset. + */ +struct arena_bin_info_s { + /* Size of regions in a run for this bin's size class. */ + size_t reg_size; + + /* Redzone size. */ + size_t redzone_size; + + /* Interval between regions (reg_size + (redzone_size << 1)). */ + size_t reg_interval; + + /* Total size of a run for this bin's size class. */ + size_t run_size; + + /* Total number of regions in a run for this bin's size class. */ + uint32_t nregs; + + /* + * Offset of first bitmap_t element in a run header for this bin's size + * class. + */ + uint32_t bitmap_offset; + + /* + * Metadata used to manipulate bitmaps for runs associated with this + * bin. + */ + bitmap_info_t bitmap_info; + + /* + * Offset of first (prof_ctx_t *) in a run header for this bin's size + * class, or 0 if (config_prof == false || opt_prof == false). + */ + uint32_t ctx0_offset; + + /* Offset of first region in a run for this bin's size class. */ + uint32_t reg0_offset; +}; + +struct arena_bin_s { + /* + * All operations on runcur, runs, and stats require that lock be + * locked. Run allocation/deallocation are protected by the arena lock, + * which may be acquired while holding one or more bin locks, but not + * vise versa. + */ + malloc_mutex_t lock; + + /* + * Current run being used to service allocations of this bin's size + * class. + */ + arena_run_t *runcur; + + /* + * Tree of non-full runs. This tree is used when looking for an + * existing run when runcur is no longer usable. We choose the + * non-full run that is lowest in memory; this policy tends to keep + * objects packed well, and it can also help reduce the number of + * almost-empty chunks. + */ + arena_run_tree_t runs; + + /* Bin statistics. */ + malloc_bin_stats_t stats; +}; + +struct arena_s { + /* This arena's index within the arenas array. */ + unsigned ind; + + /* + * Number of threads currently assigned to this arena. This field is + * protected by arenas_lock. + */ + unsigned nthreads; + + /* + * There are three classes of arena operations from a locking + * perspective: + * 1) Thread asssignment (modifies nthreads) is protected by + * arenas_lock. + * 2) Bin-related operations are protected by bin locks. + * 3) Chunk- and run-related operations are protected by this mutex. + */ + malloc_mutex_t lock; + + arena_stats_t stats; + /* + * List of tcaches for extant threads associated with this arena. + * Stats from these are merged incrementally, and at exit. + */ + ql_head(tcache_t) tcache_ql; + + uint64_t prof_accumbytes; + + /* List of dirty-page-containing chunks this arena manages. */ + ql_head(arena_chunk_t) chunks_dirty; + + /* + * In order to avoid rapid chunk allocation/deallocation when an arena + * oscillates right on the cusp of needing a new chunk, cache the most + * recently freed chunk. The spare is left in the arena's chunk trees + * until it is deleted. + * + * There is one spare chunk per arena, rather than one spare total, in + * order to avoid interactions between multiple threads that could make + * a single spare inadequate. + */ + arena_chunk_t *spare; + + /* Number of pages in active runs. */ + size_t nactive; + + /* + * Current count of pages within unused runs that are potentially + * dirty, and for which madvise(... MADV_DONTNEED) has not been called. + * By tracking this, we can institute a limit on how much dirty unused + * memory is mapped for each arena. + */ + size_t ndirty; + + /* + * Approximate number of pages being purged. It is possible for + * multiple threads to purge dirty pages concurrently, and they use + * npurgatory to indicate the total number of pages all threads are + * attempting to purge. + */ + size_t npurgatory; + + /* + * Size/address-ordered trees of this arena's available runs. The trees + * are used for first-best-fit run allocation. The dirty tree contains + * runs with dirty pages (i.e. very likely to have been touched and + * therefore have associated physical pages), whereas the clean tree + * contains runs with pages that either have no associated physical + * pages, or have pages that the kernel may recycle at any time due to + * previous madvise(2) calls. The dirty tree is used in preference to + * the clean tree for allocations, because using dirty pages reduces + * the amount of dirty purging necessary to keep the active:dirty page + * ratio below the purge threshold. + */ + arena_avail_tree_t runs_avail_clean; + arena_avail_tree_t runs_avail_dirty; + + /* bins is used to store trees of free regions. */ + arena_bin_t bins[NBINS]; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern ssize_t opt_lg_dirty_mult; +/* + * small_size2bin is a compact lookup table that rounds request sizes up to + * size classes. In order to reduce cache footprint, the table is compressed, + * and all accesses are via the SMALL_SIZE2BIN macro. + */ +extern uint8_t const small_size2bin[]; +#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) + +extern arena_bin_info_t arena_bin_info[NBINS]; + +/* Number of large size classes. */ +#define nlclasses (chunk_npages - map_bias) + +void arena_purge_all(arena_t *arena); +void arena_prof_accum(arena_t *arena, uint64_t accumbytes); +void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, + size_t binind, uint64_t prof_accumbytes); +void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, + bool zero); +void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); +void *arena_malloc_small(arena_t *arena, size_t size, bool zero); +void *arena_malloc_large(arena_t *arena, size_t size, bool zero); +void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); +size_t arena_salloc(const void *ptr, bool demote); +void arena_prof_promoted(const void *ptr, size_t size); +void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, + arena_chunk_map_t *mapelm); +void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); +void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, + arena_stats_t *astats, malloc_bin_stats_t *bstats, + malloc_large_stats_t *lstats); +void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, + size_t extra, bool zero); +void *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero, bool try_tcache); +bool arena_new(arena_t *arena, unsigned ind); +void arena_boot(void); +void arena_prefork(arena_t *arena); +void arena_postfork_parent(arena_t *arena); +void arena_postfork_child(arena_t *arena); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); +unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, + const void *ptr); +prof_ctx_t *arena_prof_ctx_get(const void *ptr); +void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); +void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, + bool try_tcache); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) +JEMALLOC_INLINE size_t +arena_bin_index(arena_t *arena, arena_bin_t *bin) +{ + size_t binind = bin - arena->bins; + assert(binind < NBINS); + return (binind); +} + +JEMALLOC_INLINE unsigned +arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) +{ + unsigned shift, diff, regind; + size_t interval; + + /* + * Freeing a pointer lower than region zero can cause assertion + * failure. + */ + assert((uintptr_t)ptr >= (uintptr_t)run + + (uintptr_t)bin_info->reg0_offset); + + /* + * Avoid doing division with a variable divisor if possible. Using + * actual division here can reduce allocator throughput by over 20%! + */ + diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - + bin_info->reg0_offset); + + /* Rescale (factor powers of 2 out of the numerator and denominator). */ + interval = bin_info->reg_interval; + shift = ffs(interval) - 1; + diff >>= shift; + interval >>= shift; + + if (interval == 1) { + /* The divisor was a power of 2. */ + regind = diff; + } else { + /* + * To divide by a number D that is not a power of two we + * multiply by (2^21 / D) and then right shift by 21 positions. + * + * X / D + * + * becomes + * + * (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT + * + * We can omit the first three elements, because we never + * divide by 0, and 1 and 2 are both powers of two, which are + * handled above. + */ +#define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS) +#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) + static const unsigned interval_invs[] = { + SIZE_INV(3), + SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), + SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), + SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), + SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), + SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), + SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), + SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) + }; + + if (interval <= ((sizeof(interval_invs) / sizeof(unsigned)) + + 2)) { + regind = (diff * interval_invs[interval - 3]) >> + SIZE_INV_SHIFT; + } else + regind = diff / interval; +#undef SIZE_INV +#undef SIZE_INV_SHIFT + } + assert(diff == regind * interval); + assert(regind < bin_info->nregs); + + return (regind); +} + +JEMALLOC_INLINE prof_ctx_t * +arena_prof_ctx_get(const void *ptr) +{ + prof_ctx_t *ret; + arena_chunk_t *chunk; + size_t pageind, mapbits; + + cassert(config_prof); + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + mapbits = chunk->map[pageind-map_bias].bits; + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { + if (prof_promote) + ret = (prof_ctx_t *)(uintptr_t)1U; + else { + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << + LG_PAGE)); + size_t binind = arena_bin_index(chunk->arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + unsigned regind; + + regind = arena_run_regind(run, bin_info, ptr); + ret = *(prof_ctx_t **)((uintptr_t)run + + bin_info->ctx0_offset + (regind * + sizeof(prof_ctx_t *))); + } + } else + ret = chunk->map[pageind-map_bias].prof_ctx; + + return (ret); +} + +JEMALLOC_INLINE void +arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +{ + arena_chunk_t *chunk; + size_t pageind, mapbits; + + cassert(config_prof); + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + mapbits = chunk->map[pageind-map_bias].bits; + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { + if (prof_promote == false) { + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << + LG_PAGE)); + arena_bin_t *bin = run->bin; + size_t binind; + arena_bin_info_t *bin_info; + unsigned regind; + + binind = arena_bin_index(chunk->arena, bin); + bin_info = &arena_bin_info[binind]; + regind = arena_run_regind(run, bin_info, ptr); + + *((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset + + (regind * sizeof(prof_ctx_t *)))) = ctx; + } else + assert((uintptr_t)ctx == (uintptr_t)1U); + } else + chunk->map[pageind-map_bias].prof_ctx = ctx; +} + +JEMALLOC_INLINE void * +arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache) +{ + tcache_t *tcache; + + assert(size != 0); + assert(size <= arena_maxclass); + + if (size <= SMALL_MAXCLASS) { + if (try_tcache && (tcache = tcache_get(true)) != NULL) + return (tcache_alloc_small(tcache, size, zero)); + else { + return (arena_malloc_small(choose_arena(arena), size, + zero)); + } + } else { + /* + * Initialize tcache after checking size in order to avoid + * infinite recursion during tcache initialization. + */ + if (try_tcache && size <= tcache_maxclass && (tcache = + tcache_get(true)) != NULL) + return (tcache_alloc_large(tcache, size, zero)); + else { + return (arena_malloc_large(choose_arena(arena), size, + zero)); + } + } +} + +JEMALLOC_INLINE void +arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) +{ + size_t pageind; + arena_chunk_map_t *mapelm; + tcache_t *tcache; + + assert(arena != NULL); + assert(chunk->arena == arena); + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + mapelm = &chunk->map[pageind-map_bias]; + assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { + /* Small allocation. */ + if (try_tcache && (tcache = tcache_get(false)) != NULL) + tcache_dalloc_small(tcache, ptr); + else { + arena_run_t *run; + arena_bin_t *bin; + + run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << + LG_PAGE)); + bin = run->bin; + if (config_debug) { + size_t binind = arena_bin_index(arena, bin); + UNUSED arena_bin_info_t *bin_info = + &arena_bin_info[binind]; + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset)) % + bin_info->reg_interval == 0); + } + malloc_mutex_lock(&bin->lock); + arena_dalloc_bin(arena, chunk, ptr, mapelm); + malloc_mutex_unlock(&bin->lock); + } + } else { + size_t size = mapelm->bits & ~PAGE_MASK; + + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + + if (try_tcache && size <= tcache_maxclass && (tcache = + tcache_get(false)) != NULL) { + tcache_dalloc_large(tcache, ptr, size); + } else { + malloc_mutex_lock(&arena->lock); + arena_dalloc_large(arena, chunk, ptr); + malloc_mutex_unlock(&arena->lock); + } + } +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/atomic.h b/contrib/jemalloc/include/jemalloc/internal/atomic.h new file mode 100644 index 000000000000..016c472aba98 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/atomic.h @@ -0,0 +1,276 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#define atomic_read_uint64(p) atomic_add_uint64(p, 0) +#define atomic_read_uint32(p) atomic_add_uint32(p, 0) +#define atomic_read_z(p) atomic_add_z(p, 0) +#define atomic_read_u(p) atomic_add_u(p, 0) + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); +uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); +uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); +uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); +size_t atomic_add_z(size_t *p, size_t x); +size_t atomic_sub_z(size_t *p, size_t x); +unsigned atomic_add_u(unsigned *p, unsigned x); +unsigned atomic_sub_u(unsigned *p, unsigned x); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) +/******************************************************************************/ +/* 64-bit operations. */ +#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} +#elif (defined(JEMALLOC_OSATOMIC)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (OSAtomicAdd64((int64_t)x, (int64_t *)p)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); +} +# elif (defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + x = (uint64_t)(-(int64_t)x); + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} +# elif (defined(JEMALLOC_ATOMIC9)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + /* + * atomic_fetchadd_64() doesn't exist, but we only ever use this + * function on LP64 systems, so atomic_fetchadd_long() will do. + */ + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (atomic_fetchadd_long(p, (unsigned long)x) + x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + assert(sizeof(uint64_t) == sizeof(unsigned long)); + + return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x); +} +# elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} +# else +# error "Missing implementation for 64-bit atomic operations" +# endif +#endif + +/******************************************************************************/ +/* 32-bit operations. */ +#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} +#elif (defined(JEMALLOC_OSATOMIC)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (OSAtomicAdd32((int32_t)x, (int32_t *)p)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); +} +#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + x = (uint32_t)(-(int32_t)x); + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} +#elif (defined(JEMALLOC_ATOMIC9)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (atomic_fetchadd_32(p, x) + x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x); +} +#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} +#else +# error "Missing implementation for 32-bit atomic operations" +#endif + +/******************************************************************************/ +/* size_t operations. */ +JEMALLOC_INLINE size_t +atomic_add_z(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_PTR == 2) + return ((size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE size_t +atomic_sub_z(size_t *p, size_t x) +{ + +#if (LG_SIZEOF_PTR == 3) + return ((size_t)atomic_add_uint64((uint64_t *)p, + (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_PTR == 2) + return ((size_t)atomic_add_uint32((uint32_t *)p, + (uint32_t)-((int32_t)x))); +#endif +} + +/******************************************************************************/ +/* unsigned operations. */ +JEMALLOC_INLINE unsigned +atomic_add_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_INT == 2) + return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE unsigned +atomic_sub_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + return ((unsigned)atomic_add_uint64((uint64_t *)p, + (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_INT == 2) + return ((unsigned)atomic_add_uint32((uint32_t *)p, + (uint32_t)-((int32_t)x))); +#endif +} +/******************************************************************************/ +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/base.h b/contrib/jemalloc/include/jemalloc/internal/base.h new file mode 100644 index 000000000000..9cf75ffb0b3c --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/base.h @@ -0,0 +1,26 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *base_alloc(size_t size); +void *base_calloc(size_t number, size_t size); +extent_node_t *base_node_alloc(void); +void base_node_dealloc(extent_node_t *node); +bool base_boot(void); +void base_prefork(void); +void base_postfork_parent(void); +void base_postfork_child(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/bitmap.h b/contrib/jemalloc/include/jemalloc/internal/bitmap.h new file mode 100644 index 000000000000..605ebac58c17 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/bitmap.h @@ -0,0 +1,184 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ +#define LG_BITMAP_MAXBITS LG_RUN_MAXREGS + +typedef struct bitmap_level_s bitmap_level_t; +typedef struct bitmap_info_s bitmap_info_t; +typedef unsigned long bitmap_t; +#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG + +/* Number of bits per group. */ +#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3) +#define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS) +#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) + +/* Maximum number of levels possible. */ +#define BITMAP_MAX_LEVELS \ + (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ + + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct bitmap_level_s { + /* Offset of this level's groups within the array of groups. */ + size_t group_offset; +}; + +struct bitmap_info_s { + /* Logical number of bits in bitmap (stored at bottom level). */ + size_t nbits; + + /* Number of levels necessary for nbits. */ + unsigned nlevels; + + /* + * Only the first (nlevels+1) elements are used, and levels are ordered + * bottom to top (e.g. the bottom level is stored in levels[0]). + */ + bitmap_level_t levels[BITMAP_MAX_LEVELS+1]; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); +size_t bitmap_info_ngroups(const bitmap_info_t *binfo); +size_t bitmap_size(size_t nbits); +void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo); +bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo); +void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_)) +JEMALLOC_INLINE bool +bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1; + bitmap_t rg = bitmap[rgoff]; + /* The bitmap is full iff the root group is 0. */ + return (rg == 0); +} + +JEMALLOC_INLINE bool +bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t g; + + assert(bit < binfo->nbits); + goff = bit >> LG_BITMAP_GROUP_NBITS; + g = bitmap[goff]; + return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))); +} + +JEMALLOC_INLINE void +bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t *gp; + bitmap_t g; + + assert(bit < binfo->nbits); + assert(bitmap_get(bitmap, binfo, bit) == false); + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[goff]; + g = *gp; + assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + assert(bitmap_get(bitmap, binfo, bit)); + /* Propagate group state transitions up the tree. */ + if (g == 0) { + unsigned i; + for (i = 1; i < binfo->nlevels; i++) { + bit = goff; + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[binfo->levels[i].group_offset + goff]; + g = *gp; + assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + if (g != 0) + break; + } + } +} + +/* sfu: set first unset. */ +JEMALLOC_INLINE size_t +bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + size_t bit; + bitmap_t g; + unsigned i; + + assert(bitmap_full(bitmap, binfo) == false); + + i = binfo->nlevels - 1; + g = bitmap[binfo->levels[i].group_offset]; + bit = ffsl(g) - 1; + while (i > 0) { + i--; + g = bitmap[binfo->levels[i].group_offset + bit]; + bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1); + } + + bitmap_set(bitmap, binfo, bit); + return (bit); +} + +JEMALLOC_INLINE void +bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t *gp; + bitmap_t g; + bool propagate; + + assert(bit < binfo->nbits); + assert(bitmap_get(bitmap, binfo, bit)); + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[goff]; + g = *gp; + propagate = (g == 0); + assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + assert(bitmap_get(bitmap, binfo, bit) == false); + /* Propagate group state transitions up the tree. */ + if (propagate) { + unsigned i; + for (i = 1; i < binfo->nlevels; i++) { + bit = goff; + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[binfo->levels[i].group_offset + goff]; + g = *gp; + propagate = (g == 0); + assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) + == 0); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + if (propagate == false) + break; + } + } +} + +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/chunk.h b/contrib/jemalloc/include/jemalloc/internal/chunk.h new file mode 100644 index 000000000000..e047c2b14348 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/chunk.h @@ -0,0 +1,58 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * Size and alignment of memory chunks that are allocated by the OS's virtual + * memory system. + */ +#define LG_CHUNK_DEFAULT 22 + +/* Return the chunk address for allocation address a. */ +#define CHUNK_ADDR2BASE(a) \ + ((void *)((uintptr_t)(a) & ~chunksize_mask)) + +/* Return the chunk offset of address a. */ +#define CHUNK_ADDR2OFFSET(a) \ + ((size_t)((uintptr_t)(a) & chunksize_mask)) + +/* Return the smallest chunk multiple that is >= s. */ +#define CHUNK_CEILING(s) \ + (((s) + chunksize_mask) & ~chunksize_mask) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern size_t opt_lg_chunk; + +/* Protects stats_chunks; currently not used for any other purpose. */ +extern malloc_mutex_t chunks_mtx; +/* Chunk statistics. */ +extern chunk_stats_t stats_chunks; + +extern rtree_t *chunks_rtree; + +extern size_t chunksize; +extern size_t chunksize_mask; /* (chunksize - 1). */ +extern size_t chunk_npages; +extern size_t map_bias; /* Number of arena chunk header pages. */ +extern size_t arena_maxclass; /* Max size class for arenas. */ + +void *chunk_alloc(size_t size, size_t alignment, bool base, bool *zero); +void chunk_dealloc(void *chunk, size_t size, bool unmap); +bool chunk_boot0(void); +bool chunk_boot1(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + +#include "jemalloc/internal/chunk_dss.h" +#include "jemalloc/internal/chunk_mmap.h" diff --git a/contrib/jemalloc/include/jemalloc/internal/chunk_dss.h b/contrib/jemalloc/include/jemalloc/internal/chunk_dss.h new file mode 100644 index 000000000000..6e2643b24c3e --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/chunk_dss.h @@ -0,0 +1,24 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *chunk_alloc_dss(size_t size, size_t alignment, bool *zero); +bool chunk_in_dss(void *chunk); +bool chunk_dss_boot(void); +void chunk_dss_prefork(void); +void chunk_dss_postfork_parent(void); +void chunk_dss_postfork_child(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/chunk_mmap.h b/contrib/jemalloc/include/jemalloc/internal/chunk_mmap.h new file mode 100644 index 000000000000..04e86af95f4d --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/chunk_mmap.h @@ -0,0 +1,22 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *chunk_alloc_mmap(size_t size, size_t alignment); +bool chunk_dealloc_mmap(void *chunk, size_t size); + +bool chunk_mmap_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/ckh.h b/contrib/jemalloc/include/jemalloc/internal/ckh.h new file mode 100644 index 000000000000..05d1fc03e7a2 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/ckh.h @@ -0,0 +1,90 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct ckh_s ckh_t; +typedef struct ckhc_s ckhc_t; + +/* Typedefs to allow easy function pointer passing. */ +typedef void ckh_hash_t (const void *, unsigned, size_t *, size_t *); +typedef bool ckh_keycomp_t (const void *, const void *); + +/* Maintain counters used to get an idea of performance. */ +/* #define CKH_COUNT */ +/* Print counter values in ckh_delete() (requires CKH_COUNT). */ +/* #define CKH_VERBOSE */ + +/* + * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket. Try to fit + * one bucket per L1 cache line. + */ +#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +/* Hash table cell. */ +struct ckhc_s { + const void *key; + const void *data; +}; + +struct ckh_s { +#ifdef CKH_COUNT + /* Counters used to get an idea of performance. */ + uint64_t ngrows; + uint64_t nshrinks; + uint64_t nshrinkfails; + uint64_t ninserts; + uint64_t nrelocs; +#endif + + /* Used for pseudo-random number generation. */ +#define CKH_A 1103515241 +#define CKH_C 12347 + uint32_t prng_state; + + /* Total number of items. */ + size_t count; + + /* + * Minimum and current number of hash table buckets. There are + * 2^LG_CKH_BUCKET_CELLS cells per bucket. + */ + unsigned lg_minbuckets; + unsigned lg_curbuckets; + + /* Hash and comparison functions. */ + ckh_hash_t *hash; + ckh_keycomp_t *keycomp; + + /* Hash table with 2^lg_curbuckets buckets. */ + ckhc_t *tab; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +bool ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, + ckh_keycomp_t *keycomp); +void ckh_delete(ckh_t *ckh); +size_t ckh_count(ckh_t *ckh); +bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); +bool ckh_insert(ckh_t *ckh, const void *key, const void *data); +bool ckh_remove(ckh_t *ckh, const void *searchkey, void **key, + void **data); +bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data); +void ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, + size_t *hash2); +bool ckh_string_keycomp(const void *k1, const void *k2); +void ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, + size_t *hash2); +bool ckh_pointer_keycomp(const void *k1, const void *k2); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/ctl.h b/contrib/jemalloc/include/jemalloc/internal/ctl.h new file mode 100644 index 000000000000..a48d09fe7929 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/ctl.h @@ -0,0 +1,109 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct ctl_node_s ctl_node_t; +typedef struct ctl_arena_stats_s ctl_arena_stats_t; +typedef struct ctl_stats_s ctl_stats_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct ctl_node_s { + bool named; + union { + struct { + const char *name; + /* If (nchildren == 0), this is a terminal node. */ + unsigned nchildren; + const ctl_node_t *children; + } named; + struct { + const ctl_node_t *(*index)(const size_t *, size_t, + size_t); + } indexed; + } u; + int (*ctl)(const size_t *, size_t, void *, size_t *, void *, + size_t); +}; + +struct ctl_arena_stats_s { + bool initialized; + unsigned nthreads; + size_t pactive; + size_t pdirty; + arena_stats_t astats; + + /* Aggregate stats for small size classes, based on bin stats. */ + size_t allocated_small; + uint64_t nmalloc_small; + uint64_t ndalloc_small; + uint64_t nrequests_small; + + malloc_bin_stats_t bstats[NBINS]; + malloc_large_stats_t *lstats; /* nlclasses elements. */ +}; + +struct ctl_stats_s { + size_t allocated; + size_t active; + size_t mapped; + struct { + size_t current; /* stats_chunks.curchunks */ + uint64_t total; /* stats_chunks.nchunks */ + size_t high; /* stats_chunks.highchunks */ + } chunks; + struct { + size_t allocated; /* huge_allocated */ + uint64_t nmalloc; /* huge_nmalloc */ + uint64_t ndalloc; /* huge_ndalloc */ + } huge; + ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +int ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, + size_t newlen); +int ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp); + +int ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen); +bool ctl_boot(void); + +#define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ + if (je_mallctl(name, oldp, oldlenp, newp, newlen) \ + != 0) { \ + malloc_printf( \ + ": Failure in xmallctl(\"%s\", ...)\n", \ + name); \ + abort(); \ + } \ +} while (0) + +#define xmallctlnametomib(name, mibp, miblenp) do { \ + if (je_mallctlnametomib(name, mibp, miblenp) != 0) { \ + malloc_printf(": Failure in " \ + "xmallctlnametomib(\"%s\", ...)\n", name); \ + abort(); \ + } \ +} while (0) + +#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \ + if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp, \ + newlen) != 0) { \ + malloc_write( \ + ": Failure in xmallctlbymib()\n"); \ + abort(); \ + } \ +} while (0) + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/contrib/jemalloc/include/jemalloc/internal/extent.h b/contrib/jemalloc/include/jemalloc/internal/extent.h new file mode 100644 index 000000000000..36af8be8995f --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/extent.h @@ -0,0 +1,43 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct extent_node_s extent_node_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +/* Tree of extents. */ +struct extent_node_s { + /* Linkage for the size/address-ordered tree. */ + rb_node(extent_node_t) link_szad; + + /* Linkage for the address-ordered tree. */ + rb_node(extent_node_t) link_ad; + + /* Profile counters, used for huge objects. */ + prof_ctx_t *prof_ctx; + + /* Pointer to the extent that this tree node is responsible for. */ + void *addr; + + /* Total region size. */ + size_t size; +}; +typedef rb_tree(extent_node_t) extent_tree_t; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t) + +rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/contrib/jemalloc/include/jemalloc/internal/hash.h b/contrib/jemalloc/include/jemalloc/internal/hash.h new file mode 100644 index 000000000000..2f501f5d4c97 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/hash.h @@ -0,0 +1,70 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +uint64_t hash(const void *key, size_t len, uint64_t seed); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_)) +/* + * The following hash function is based on MurmurHash64A(), placed into the + * public domain by Austin Appleby. See http://murmurhash.googlepages.com/ for + * details. + */ +JEMALLOC_INLINE uint64_t +hash(const void *key, size_t len, uint64_t seed) +{ + const uint64_t m = UINT64_C(0xc6a4a7935bd1e995); + const int r = 47; + uint64_t h = seed ^ (len * m); + const uint64_t *data = (const uint64_t *)key; + const uint64_t *end = data + (len/8); + const unsigned char *data2; + + assert(((uintptr_t)key & 0x7) == 0); + + while(data != end) { + uint64_t k = *data++; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + data2 = (const unsigned char *)data; + switch(len & 7) { + case 7: h ^= ((uint64_t)(data2[6])) << 48; + case 6: h ^= ((uint64_t)(data2[5])) << 40; + case 5: h ^= ((uint64_t)(data2[4])) << 32; + case 4: h ^= ((uint64_t)(data2[3])) << 24; + case 3: h ^= ((uint64_t)(data2[2])) << 16; + case 2: h ^= ((uint64_t)(data2[1])) << 8; + case 1: h ^= ((uint64_t)(data2[0])); + h *= m; + } + + h ^= h >> r; + h *= m; + h ^= h >> r; + + return (h); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/huge.h b/contrib/jemalloc/include/jemalloc/internal/huge.h new file mode 100644 index 000000000000..e8513c933e24 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/huge.h @@ -0,0 +1,40 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +/* Huge allocation statistics. */ +extern uint64_t huge_nmalloc; +extern uint64_t huge_ndalloc; +extern size_t huge_allocated; + +/* Protects chunk-related data structures. */ +extern malloc_mutex_t huge_mtx; + +void *huge_malloc(size_t size, bool zero); +void *huge_palloc(size_t size, size_t alignment, bool zero); +void *huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, + size_t extra); +void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero); +void huge_dalloc(void *ptr, bool unmap); +size_t huge_salloc(const void *ptr); +prof_ctx_t *huge_prof_ctx_get(const void *ptr); +void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +bool huge_boot(void); +void huge_prefork(void); +void huge_postfork_parent(void); +void huge_postfork_child(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h new file mode 100644 index 000000000000..5a3ba1605d44 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h @@ -0,0 +1,883 @@ +#ifndef JEMALLOC_INTERNAL_H +#define JEMALLOC_INTERNAL_H +#include "libc_private.h" +#include "namespace.h" + +#include +#include +#include +#if !defined(SYS_write) && defined(__NR_write) +#define SYS_write __NR_write +#endif +#include +#include +#include + +#include +#include +#ifndef SIZE_T_MAX +# define SIZE_T_MAX SIZE_MAX +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef offsetof +# define offsetof(type, member) ((size_t)&(((type *)NULL)->member)) +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +#include "un-namespace.h" +#include "libc_private.h" + +#define JEMALLOC_NO_DEMANGLE +#include "../jemalloc.h" + +#ifdef JEMALLOC_UTRACE +#include +#endif + +#ifdef JEMALLOC_VALGRIND +#include +#include +#endif + +#include "jemalloc/internal/private_namespace.h" + +#ifdef JEMALLOC_CC_SILENCE +#define UNUSED JEMALLOC_ATTR(unused) +#else +#define UNUSED +#endif + +static const bool config_debug = +#ifdef JEMALLOC_DEBUG + true +#else + false +#endif + ; +static const bool config_dss = +#ifdef JEMALLOC_DSS + true +#else + false +#endif + ; +static const bool config_fill = +#ifdef JEMALLOC_FILL + true +#else + false +#endif + ; +static const bool config_lazy_lock = +#ifdef JEMALLOC_LAZY_LOCK + true +#else + false +#endif + ; +static const bool config_prof = +#ifdef JEMALLOC_PROF + true +#else + false +#endif + ; +static const bool config_prof_libgcc = +#ifdef JEMALLOC_PROF_LIBGCC + true +#else + false +#endif + ; +static const bool config_prof_libunwind = +#ifdef JEMALLOC_PROF_LIBUNWIND + true +#else + false +#endif + ; +static const bool config_munmap = +#ifdef JEMALLOC_MUNMAP + true +#else + false +#endif + ; +static const bool config_stats = +#ifdef JEMALLOC_STATS + true +#else + false +#endif + ; +static const bool config_tcache = +#ifdef JEMALLOC_TCACHE + true +#else + false +#endif + ; +static const bool config_tls = +#ifdef JEMALLOC_TLS + true +#else + false +#endif + ; +static const bool config_utrace = +#ifdef JEMALLOC_UTRACE + true +#else + false +#endif + ; +static const bool config_valgrind = +#ifdef JEMALLOC_VALGRIND + true +#else + false +#endif + ; +static const bool config_xmalloc = +#ifdef JEMALLOC_XMALLOC + true +#else + false +#endif + ; +static const bool config_ivsalloc = +#ifdef JEMALLOC_IVSALLOC + true +#else + false +#endif + ; + +#ifdef JEMALLOC_ATOMIC9 +#include +#endif + +#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) +#include +#endif + +#ifdef JEMALLOC_ZONE +#include +#include +#include +#include +#endif + +#define RB_COMPACT +#include "jemalloc/internal/rb.h" +#include "jemalloc/internal/qr.h" +#include "jemalloc/internal/ql.h" + +/* + * jemalloc can conceptually be broken into components (arena, tcache, etc.), + * but there are circular dependencies that cannot be broken without + * substantial performance degradation. In order to reduce the effect on + * visual code flow, read the header files in multiple passes, with one of the + * following cpp variables defined during each pass: + * + * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data + * types. + * JEMALLOC_H_STRUCTS : Data structures. + * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes. + * JEMALLOC_H_INLINES : Inline functions. + */ +/******************************************************************************/ +#define JEMALLOC_H_TYPES + +#define ALLOCM_LG_ALIGN_MASK ((int)0x3f) + +#define ZU(z) ((size_t)z) + +#ifndef __DECONST +# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) +#endif + +#ifdef JEMALLOC_DEBUG + /* Disable inlining to make debugging easier. */ +# define JEMALLOC_INLINE +# define inline +#else +# define JEMALLOC_ENABLE_INLINE +# define JEMALLOC_INLINE static inline +#endif + +/* Smallest size class to support. */ +#define LG_TINY_MIN 3 +#define TINY_MIN (1U << LG_TINY_MIN) + +/* + * Minimum alignment of allocations is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +#ifndef LG_QUANTUM +# ifdef __i386__ +# define LG_QUANTUM 4 +# endif +# ifdef __ia64__ +# define LG_QUANTUM 4 +# endif +# ifdef __alpha__ +# define LG_QUANTUM 4 +# endif +# ifdef __sparc64__ +# define LG_QUANTUM 4 +# endif +# if (defined(__amd64__) || defined(__x86_64__)) +# define LG_QUANTUM 4 +# endif +# ifdef __arm__ +# define LG_QUANTUM 3 +# endif +# ifdef __mips__ +# define LG_QUANTUM 3 +# endif +# ifdef __powerpc__ +# define LG_QUANTUM 4 +# endif +# ifdef __s390x__ +# define LG_QUANTUM 4 +# endif +# ifdef __SH4__ +# define LG_QUANTUM 4 +# endif +# ifdef __tile__ +# define LG_QUANTUM 4 +# endif +# ifndef LG_QUANTUM +# error "No LG_QUANTUM definition for architecture; specify via CPPFLAGS" +# endif +#endif + +#define QUANTUM ((size_t)(1U << LG_QUANTUM)) +#define QUANTUM_MASK (QUANTUM - 1) + +/* Return the smallest quantum multiple that is >= a. */ +#define QUANTUM_CEILING(a) \ + (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) + +#define LONG ((size_t)(1U << LG_SIZEOF_LONG)) +#define LONG_MASK (LONG - 1) + +/* Return the smallest long multiple that is >= a. */ +#define LONG_CEILING(a) \ + (((a) + LONG_MASK) & ~LONG_MASK) + +#define SIZEOF_PTR (1U << LG_SIZEOF_PTR) +#define PTR_MASK (SIZEOF_PTR - 1) + +/* Return the smallest (void *) multiple that is >= a. */ +#define PTR_CEILING(a) \ + (((a) + PTR_MASK) & ~PTR_MASK) + +/* + * Maximum size of L1 cache line. This is used to avoid cache line aliasing. + * In addition, this controls the spacing of cacheline-spaced size classes. + */ +#define LG_CACHELINE 6 +#define CACHELINE ((size_t)(1U << LG_CACHELINE)) +#define CACHELINE_MASK (CACHELINE - 1) + +/* Return the smallest cacheline multiple that is >= s. */ +#define CACHELINE_CEILING(s) \ + (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) + +/* Page size. STATIC_PAGE_SHIFT is determined by the configure script. */ +#ifdef PAGE_MASK +# undef PAGE_MASK +#endif +#define LG_PAGE STATIC_PAGE_SHIFT +#define PAGE ((size_t)(1U << STATIC_PAGE_SHIFT)) +#define PAGE_MASK ((size_t)(PAGE - 1)) + +/* Return the smallest pagesize multiple that is >= s. */ +#define PAGE_CEILING(s) \ + (((s) + PAGE_MASK) & ~PAGE_MASK) + +/* Return the nearest aligned address at or below a. */ +#define ALIGNMENT_ADDR2BASE(a, alignment) \ + ((void *)((uintptr_t)(a) & (-(alignment)))) + +/* Return the offset between a and the nearest aligned address at or below a. */ +#define ALIGNMENT_ADDR2OFFSET(a, alignment) \ + ((size_t)((uintptr_t)(a) & (alignment - 1))) + +/* Return the smallest alignment multiple that is >= s. */ +#define ALIGNMENT_CEILING(s, alignment) \ + (((s) + (alignment - 1)) & (-(alignment))) + +#ifdef JEMALLOC_VALGRIND +/* + * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions + * so that when Valgrind reports errors, there are no extra stack frames + * in the backtraces. + * + * The size that is reported to valgrind must be consistent through a chain of + * malloc..realloc..realloc calls. Request size isn't recorded anywhere in + * jemalloc, so it is critical that all callers of these macros provide usize + * rather than request size. As a result, buffer overflow detection is + * technically weakened for the standard API, though it is generally accepted + * practice to consider any extra bytes reported by malloc_usable_size() as + * usable space. + */ +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \ + if (config_valgrind && opt_valgrind && cond) \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \ +} while (0) +#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ + old_rzsize, zero) do { \ + if (config_valgrind && opt_valgrind) { \ + size_t rzsize = p2rz(ptr); \ + \ + if (ptr == old_ptr) { \ + VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \ + usize, rzsize); \ + if (zero && old_usize < usize) { \ + VALGRIND_MAKE_MEM_DEFINED( \ + (void *)((uintptr_t)ptr + \ + old_usize), usize - old_usize); \ + } \ + } else { \ + if (old_ptr != NULL) { \ + VALGRIND_FREELIKE_BLOCK(old_ptr, \ + old_rzsize); \ + } \ + if (ptr != NULL) { \ + size_t copy_size = (old_usize < usize) \ + ? old_usize : usize; \ + size_t tail_size = usize - copy_size; \ + VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \ + rzsize, false); \ + if (copy_size > 0) { \ + VALGRIND_MAKE_MEM_DEFINED(ptr, \ + copy_size); \ + } \ + if (zero && tail_size > 0) { \ + VALGRIND_MAKE_MEM_DEFINED( \ + (void *)((uintptr_t)ptr + \ + copy_size), tail_size); \ + } \ + } \ + } \ + } \ +} while (0) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \ + if (config_valgrind && opt_valgrind) \ + VALGRIND_FREELIKE_BLOCK(ptr, rzsize); \ +} while (0) +#else +#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) +#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) +#define VALGRIND_FREELIKE_BLOCK(addr, rzB) +#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) +#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) +#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) +#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \ + old_rzsize, zero) +#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) +#endif + +#include "jemalloc/internal/util.h" +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" +#include "jemalloc/internal/prof.h" + +#undef JEMALLOC_H_TYPES +/******************************************************************************/ +#define JEMALLOC_H_STRUCTS + +#include "jemalloc/internal/util.h" +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" +#include "jemalloc/internal/prof.h" + +typedef struct { + uint64_t allocated; + uint64_t deallocated; +} thread_allocated_t; +/* + * The JEMALLOC_CONCAT() wrapper is necessary to pass {0, 0} via a cpp macro + * argument. + */ +#define THREAD_ALLOCATED_INITIALIZER JEMALLOC_CONCAT({0, 0}) + +#undef JEMALLOC_H_STRUCTS +/******************************************************************************/ +#define JEMALLOC_H_EXTERNS + +extern bool opt_abort; +extern bool opt_junk; +extern size_t opt_quarantine; +extern bool opt_redzone; +extern bool opt_utrace; +extern bool opt_valgrind; +extern bool opt_xmalloc; +extern bool opt_zero; +extern size_t opt_narenas; + +/* Number of CPUs. */ +extern unsigned ncpus; + +extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ +/* + * Arenas that are used to service external requests. Not all elements of the + * arenas array are necessarily used; arenas are created lazily as needed. + */ +extern arena_t **arenas; +extern unsigned narenas; + +arena_t *arenas_extend(unsigned ind); +void arenas_cleanup(void *arg); +arena_t *choose_arena_hard(void); +void jemalloc_prefork(void); +void jemalloc_postfork_parent(void); +void jemalloc_postfork_child(void); + +#include "jemalloc/internal/util.h" +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" +#include "jemalloc/internal/prof.h" + +#undef JEMALLOC_H_EXTERNS +/******************************************************************************/ +#define JEMALLOC_H_INLINES + +#include "jemalloc/internal/util.h" +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/prng.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/tsd.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" + +#ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *) + +size_t s2u(size_t size); +size_t sa2u(size_t size, size_t alignment); +arena_t *choose_arena(arena_t *arena); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +/* + * Map of pthread_self() --> arenas[???], used for selecting an arena to use + * for allocations. + */ +malloc_tsd_externs(arenas, arena_t *) +malloc_tsd_funcs(JEMALLOC_INLINE, arenas, arena_t *, NULL, arenas_cleanup) + +/* + * Compute usable size that would result from allocating an object with the + * specified size. + */ +JEMALLOC_INLINE size_t +s2u(size_t size) +{ + + if (size <= SMALL_MAXCLASS) + return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size); + if (size <= arena_maxclass) + return (PAGE_CEILING(size)); + return (CHUNK_CEILING(size)); +} + +/* + * Compute usable size that would result from allocating an object with the + * specified size and alignment. + */ +JEMALLOC_INLINE size_t +sa2u(size_t size, size_t alignment) +{ + size_t usize; + + assert(alignment != 0 && ((alignment - 1) & alignment) == 0); + + /* + * Round size up to the nearest multiple of alignment. + * + * This done, we can take advantage of the fact that for each small + * size class, every object is aligned at the smallest power of two + * that is non-zero in the base two representation of the size. For + * example: + * + * Size | Base 2 | Minimum alignment + * -----+----------+------------------ + * 96 | 1100000 | 32 + * 144 | 10100000 | 32 + * 192 | 11000000 | 64 + */ + usize = ALIGNMENT_CEILING(size, alignment); + /* + * (usize < size) protects against the combination of maximal + * alignment and size greater than maximal alignment. + */ + if (usize < size) { + /* size_t overflow. */ + return (0); + } + + if (usize <= arena_maxclass && alignment <= PAGE) { + if (usize <= SMALL_MAXCLASS) + return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size); + return (PAGE_CEILING(usize)); + } else { + size_t run_size; + + /* + * We can't achieve subpage alignment, so round up alignment + * permanently; it makes later calculations simpler. + */ + alignment = PAGE_CEILING(alignment); + usize = PAGE_CEILING(size); + /* + * (usize < size) protects against very large sizes within + * PAGE of SIZE_T_MAX. + * + * (usize + alignment < usize) protects against the + * combination of maximal alignment and usize large enough + * to cause overflow. This is similar to the first overflow + * check above, but it needs to be repeated due to the new + * usize value, which may now be *equal* to maximal + * alignment, whereas before we only detected overflow if the + * original size was *greater* than maximal alignment. + */ + if (usize < size || usize + alignment < usize) { + /* size_t overflow. */ + return (0); + } + + /* + * Calculate the size of the over-size run that arena_palloc() + * would need to allocate in order to guarantee the alignment. + * If the run wouldn't fit within a chunk, round up to a huge + * allocation size. + */ + run_size = usize + alignment - PAGE; + if (run_size <= arena_maxclass) + return (PAGE_CEILING(usize)); + return (CHUNK_CEILING(usize)); + } +} + +/* Choose an arena based on a per-thread value. */ +JEMALLOC_INLINE arena_t * +choose_arena(arena_t *arena) +{ + arena_t *ret; + + if (arena != NULL) + return (arena); + + if ((ret = *arenas_tsd_get()) == NULL) { + ret = choose_arena_hard(); + assert(ret != NULL); + } + + return (ret); +} +#endif + +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/hash.h" +#include "jemalloc/internal/quarantine.h" + +#ifndef JEMALLOC_ENABLE_INLINE +void *imalloc(size_t size); +void *icalloc(size_t size); +void *ipalloc(size_t usize, size_t alignment, bool zero); +size_t isalloc(const void *ptr, bool demote); +size_t ivsalloc(const void *ptr, bool demote); +size_t u2rz(size_t usize); +size_t p2rz(const void *ptr); +void idalloc(void *ptr); +void iqalloc(void *ptr); +void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, + bool zero, bool no_move); +malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t) +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +JEMALLOC_INLINE void * +imalloc(size_t size) +{ + + assert(size != 0); + + if (size <= arena_maxclass) + return (arena_malloc(NULL, size, false, true)); + else + return (huge_malloc(size, false)); +} + +JEMALLOC_INLINE void * +icalloc(size_t size) +{ + + if (size <= arena_maxclass) + return (arena_malloc(NULL, size, true, true)); + else + return (huge_malloc(size, true)); +} + +JEMALLOC_INLINE void * +ipalloc(size_t usize, size_t alignment, bool zero) +{ + void *ret; + + assert(usize != 0); + assert(usize == sa2u(usize, alignment)); + + if (usize <= arena_maxclass && alignment <= PAGE) + ret = arena_malloc(NULL, usize, zero, true); + else { + if (usize <= arena_maxclass) { + ret = arena_palloc(choose_arena(NULL), usize, alignment, + zero); + } else if (alignment <= chunksize) + ret = huge_malloc(usize, zero); + else + ret = huge_palloc(usize, alignment, zero); + } + + assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); + return (ret); +} + +/* + * Typical usage: + * void *ptr = [...] + * size_t sz = isalloc(ptr, config_prof); + */ +JEMALLOC_INLINE size_t +isalloc(const void *ptr, bool demote) +{ + size_t ret; + arena_chunk_t *chunk; + + assert(ptr != NULL); + /* Demotion only makes sense if config_prof is true. */ + assert(config_prof || demote == false); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) { + /* Region. */ + ret = arena_salloc(ptr, demote); + } else + ret = huge_salloc(ptr); + + return (ret); +} + +JEMALLOC_INLINE size_t +ivsalloc(const void *ptr, bool demote) +{ + + /* Return 0 if ptr is not within a chunk managed by jemalloc. */ + if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL) + return (0); + + return (isalloc(ptr, demote)); +} + +JEMALLOC_INLINE size_t +u2rz(size_t usize) +{ + size_t ret; + + if (usize <= SMALL_MAXCLASS) { + size_t binind = SMALL_SIZE2BIN(usize); + ret = arena_bin_info[binind].redzone_size; + } else + ret = 0; + + return (ret); +} + +JEMALLOC_INLINE size_t +p2rz(const void *ptr) +{ + size_t usize = isalloc(ptr, false); + + return (u2rz(usize)); +} + +JEMALLOC_INLINE void +idalloc(void *ptr) +{ + arena_chunk_t *chunk; + + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) + arena_dalloc(chunk->arena, chunk, ptr, true); + else + huge_dalloc(ptr, true); +} + +JEMALLOC_INLINE void +iqalloc(void *ptr) +{ + + if (config_fill && opt_quarantine) + quarantine(ptr); + else + idalloc(ptr); +} + +JEMALLOC_INLINE void * +iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, + bool no_move) +{ + void *ret; + size_t oldsize; + + assert(ptr != NULL); + assert(size != 0); + + oldsize = isalloc(ptr, config_prof); + + if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) + != 0) { + size_t usize, copysize; + + /* + * Existing object alignment is inadequate; allocate new space + * and copy. + */ + if (no_move) + return (NULL); + usize = sa2u(size + extra, alignment); + if (usize == 0) + return (NULL); + ret = ipalloc(usize, alignment, zero); + if (ret == NULL) { + if (extra == 0) + return (NULL); + /* Try again, without extra this time. */ + usize = sa2u(size, alignment); + if (usize == 0) + return (NULL); + ret = ipalloc(usize, alignment, zero); + if (ret == NULL) + return (NULL); + } + /* + * Copy at most size bytes (not size+extra), since the caller + * has no expectation that the extra bytes will be reliably + * preserved. + */ + copysize = (size < oldsize) ? size : oldsize; + memcpy(ret, ptr, copysize); + iqalloc(ptr); + return (ret); + } + + if (no_move) { + if (size <= arena_maxclass) { + return (arena_ralloc_no_move(ptr, oldsize, size, + extra, zero)); + } else { + return (huge_ralloc_no_move(ptr, oldsize, size, + extra)); + } + } else { + if (size + extra <= arena_maxclass) { + return (arena_ralloc(ptr, oldsize, size, extra, + alignment, zero, true)); + } else { + return (huge_ralloc(ptr, oldsize, size, extra, + alignment, zero)); + } + } +} + +malloc_tsd_externs(thread_allocated, thread_allocated_t) +malloc_tsd_funcs(JEMALLOC_INLINE, thread_allocated, thread_allocated_t, + THREAD_ALLOCATED_INITIALIZER, malloc_tsd_no_cleanup) +#endif + +#include "jemalloc/internal/prof.h" + +#undef JEMALLOC_H_INLINES +/******************************************************************************/ +#endif /* JEMALLOC_INTERNAL_H */ diff --git a/contrib/jemalloc/include/jemalloc/internal/mb.h b/contrib/jemalloc/include/jemalloc/internal/mb.h new file mode 100644 index 000000000000..3cfa7872942b --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/mb.h @@ -0,0 +1,115 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void mb_write(void); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MB_C_)) +#ifdef __i386__ +/* + * According to the Intel Architecture Software Developer's Manual, current + * processors execute instructions in order from the perspective of other + * processors in a multiprocessor system, but 1) Intel reserves the right to + * change that, and 2) the compiler's optimizer could re-order instructions if + * there weren't some form of barrier. Therefore, even if running on an + * architecture that does not need memory barriers (everything through at least + * i686), an "optimizer barrier" is necessary. + */ +JEMALLOC_INLINE void +mb_write(void) +{ + +# if 0 + /* This is a true memory barrier. */ + asm volatile ("pusha;" + "xor %%eax,%%eax;" + "cpuid;" + "popa;" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +#else + /* + * This is hopefully enough to keep the compiler from reordering + * instructions around this one. + */ + asm volatile ("nop;" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +#endif +} +#elif (defined(__amd64__) || defined(__x86_64__)) +JEMALLOC_INLINE void +mb_write(void) +{ + + asm volatile ("sfence" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +#elif defined(__powerpc__) +JEMALLOC_INLINE void +mb_write(void) +{ + + asm volatile ("eieio" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +#elif defined(__sparc64__) +JEMALLOC_INLINE void +mb_write(void) +{ + + asm volatile ("membar #StoreStore" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +#elif defined(__tile__) +JEMALLOC_INLINE void +mb_write(void) +{ + + __sync_synchronize(); +} +#else +/* + * This is much slower than a simple memory barrier, but the semantics of mutex + * unlock make this work. + */ +JEMALLOC_INLINE void +mb_write(void) +{ + malloc_mutex_t mtx; + + malloc_mutex_init(&mtx); + malloc_mutex_lock(&mtx); + malloc_mutex_unlock(&mtx); +} +#endif +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/mutex.h b/contrib/jemalloc/include/jemalloc/internal/mutex.h new file mode 100644 index 000000000000..d7133f4d29c0 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/mutex.h @@ -0,0 +1,88 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct malloc_mutex_s malloc_mutex_t; + +#ifdef JEMALLOC_OSSPIN +#define MALLOC_MUTEX_INITIALIZER {0} +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) +#define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL} +#else +# if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) && \ + defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)) +# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP +# define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP} +# else +# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT +# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER} +# endif +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct malloc_mutex_s { +#ifdef JEMALLOC_OSSPIN + OSSpinLock lock; +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) + pthread_mutex_t lock; + malloc_mutex_t *postponed_next; +#else + pthread_mutex_t lock; +#endif +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#ifdef JEMALLOC_LAZY_LOCK +extern bool isthreaded; +#endif + +bool malloc_mutex_init(malloc_mutex_t *mutex); +void malloc_mutex_prefork(malloc_mutex_t *mutex); +void malloc_mutex_postfork_parent(malloc_mutex_t *mutex); +void malloc_mutex_postfork_child(malloc_mutex_t *mutex); +bool mutex_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void malloc_mutex_lock(malloc_mutex_t *mutex); +void malloc_mutex_unlock(malloc_mutex_t *mutex); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) +JEMALLOC_INLINE void +malloc_mutex_lock(malloc_mutex_t *mutex) +{ + + if (isthreaded) { +#ifdef JEMALLOC_OSSPIN + OSSpinLockLock(&mutex->lock); +#else + pthread_mutex_lock(&mutex->lock); +#endif + } +} + +JEMALLOC_INLINE void +malloc_mutex_unlock(malloc_mutex_t *mutex) +{ + + if (isthreaded) { +#ifdef JEMALLOC_OSSPIN + OSSpinLockUnlock(&mutex->lock); +#else + pthread_mutex_unlock(&mutex->lock); +#endif + } +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/private_namespace.h b/contrib/jemalloc/include/jemalloc/internal/private_namespace.h new file mode 100644 index 000000000000..a69482b67262 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/private_namespace.h @@ -0,0 +1,274 @@ +#define arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small) +#define arena_bin_index JEMALLOC_N(arena_bin_index) +#define arena_boot JEMALLOC_N(arena_boot) +#define arena_dalloc JEMALLOC_N(arena_dalloc) +#define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin) +#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) +#define arena_dalloc_large JEMALLOC_N(arena_dalloc_large) +#define arena_malloc JEMALLOC_N(arena_malloc) +#define arena_malloc_large JEMALLOC_N(arena_malloc_large) +#define arena_malloc_small JEMALLOC_N(arena_malloc_small) +#define arena_new JEMALLOC_N(arena_new) +#define arena_palloc JEMALLOC_N(arena_palloc) +#define arena_postfork_child JEMALLOC_N(arena_postfork_child) +#define arena_postfork_parent JEMALLOC_N(arena_postfork_parent) +#define arena_prefork JEMALLOC_N(arena_prefork) +#define arena_prof_accum JEMALLOC_N(arena_prof_accum) +#define arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get) +#define arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set) +#define arena_prof_promoted JEMALLOC_N(arena_prof_promoted) +#define arena_purge_all JEMALLOC_N(arena_purge_all) +#define arena_ralloc JEMALLOC_N(arena_ralloc) +#define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) +#define arena_run_regind JEMALLOC_N(arena_run_regind) +#define arena_salloc JEMALLOC_N(arena_salloc) +#define arena_stats_merge JEMALLOC_N(arena_stats_merge) +#define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) +#define arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index) +#define arenas_cleanup JEMALLOC_N(arenas_cleanup) +#define arenas_extend JEMALLOC_N(arenas_extend) +#define arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index) +#define arenas_tls JEMALLOC_N(arenas_tls) +#define arenas_tsd_boot JEMALLOC_N(arenas_tsd_boot) +#define arenas_tsd_cleanup_wrapper JEMALLOC_N(arenas_tsd_cleanup_wrapper) +#define arenas_tsd_get JEMALLOC_N(arenas_tsd_get) +#define arenas_tsd_set JEMALLOC_N(arenas_tsd_set) +#define atomic_add_u JEMALLOC_N(atomic_add_u) +#define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32) +#define atomic_add_uint64 JEMALLOC_N(atomic_add_uint64) +#define atomic_add_z JEMALLOC_N(atomic_add_z) +#define atomic_sub_u JEMALLOC_N(atomic_sub_u) +#define atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32) +#define atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64) +#define atomic_sub_z JEMALLOC_N(atomic_sub_z) +#define base_alloc JEMALLOC_N(base_alloc) +#define base_boot JEMALLOC_N(base_boot) +#define base_calloc JEMALLOC_N(base_calloc) +#define base_node_alloc JEMALLOC_N(base_node_alloc) +#define base_node_dealloc JEMALLOC_N(base_node_dealloc) +#define base_postfork_child JEMALLOC_N(base_postfork_child) +#define base_postfork_parent JEMALLOC_N(base_postfork_parent) +#define base_prefork JEMALLOC_N(base_prefork) +#define bitmap_full JEMALLOC_N(bitmap_full) +#define bitmap_get JEMALLOC_N(bitmap_get) +#define bitmap_info_init JEMALLOC_N(bitmap_info_init) +#define bitmap_info_ngroups JEMALLOC_N(bitmap_info_ngroups) +#define bitmap_init JEMALLOC_N(bitmap_init) +#define bitmap_set JEMALLOC_N(bitmap_set) +#define bitmap_sfu JEMALLOC_N(bitmap_sfu) +#define bitmap_size JEMALLOC_N(bitmap_size) +#define bitmap_unset JEMALLOC_N(bitmap_unset) +#define bt_init JEMALLOC_N(bt_init) +#define buferror JEMALLOC_N(buferror) +#define choose_arena JEMALLOC_N(choose_arena) +#define choose_arena_hard JEMALLOC_N(choose_arena_hard) +#define chunk_alloc JEMALLOC_N(chunk_alloc) +#define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) +#define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) +#define chunk_boot0 JEMALLOC_N(chunk_boot0) +#define chunk_boot1 JEMALLOC_N(chunk_boot1) +#define chunk_dealloc JEMALLOC_N(chunk_dealloc) +#define chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap) +#define chunk_dss_boot JEMALLOC_N(chunk_dss_boot) +#define chunk_dss_postfork_child JEMALLOC_N(chunk_dss_postfork_child) +#define chunk_dss_postfork_parent JEMALLOC_N(chunk_dss_postfork_parent) +#define chunk_dss_prefork JEMALLOC_N(chunk_dss_prefork) +#define chunk_in_dss JEMALLOC_N(chunk_in_dss) +#define chunk_mmap_boot JEMALLOC_N(chunk_mmap_boot) +#define ckh_bucket_search JEMALLOC_N(ckh_bucket_search) +#define ckh_count JEMALLOC_N(ckh_count) +#define ckh_delete JEMALLOC_N(ckh_delete) +#define ckh_evict_reloc_insert JEMALLOC_N(ckh_evict_reloc_insert) +#define ckh_insert JEMALLOC_N(ckh_insert) +#define ckh_isearch JEMALLOC_N(ckh_isearch) +#define ckh_iter JEMALLOC_N(ckh_iter) +#define ckh_new JEMALLOC_N(ckh_new) +#define ckh_pointer_hash JEMALLOC_N(ckh_pointer_hash) +#define ckh_pointer_keycomp JEMALLOC_N(ckh_pointer_keycomp) +#define ckh_rebuild JEMALLOC_N(ckh_rebuild) +#define ckh_remove JEMALLOC_N(ckh_remove) +#define ckh_search JEMALLOC_N(ckh_search) +#define ckh_string_hash JEMALLOC_N(ckh_string_hash) +#define ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp) +#define ckh_try_bucket_insert JEMALLOC_N(ckh_try_bucket_insert) +#define ckh_try_insert JEMALLOC_N(ckh_try_insert) +#define ctl_boot JEMALLOC_N(ctl_boot) +#define ctl_bymib JEMALLOC_N(ctl_bymib) +#define ctl_byname JEMALLOC_N(ctl_byname) +#define ctl_nametomib JEMALLOC_N(ctl_nametomib) +#define extent_tree_ad_first JEMALLOC_N(extent_tree_ad_first) +#define extent_tree_ad_insert JEMALLOC_N(extent_tree_ad_insert) +#define extent_tree_ad_iter JEMALLOC_N(extent_tree_ad_iter) +#define extent_tree_ad_iter_recurse JEMALLOC_N(extent_tree_ad_iter_recurse) +#define extent_tree_ad_iter_start JEMALLOC_N(extent_tree_ad_iter_start) +#define extent_tree_ad_last JEMALLOC_N(extent_tree_ad_last) +#define extent_tree_ad_new JEMALLOC_N(extent_tree_ad_new) +#define extent_tree_ad_next JEMALLOC_N(extent_tree_ad_next) +#define extent_tree_ad_nsearch JEMALLOC_N(extent_tree_ad_nsearch) +#define extent_tree_ad_prev JEMALLOC_N(extent_tree_ad_prev) +#define extent_tree_ad_psearch JEMALLOC_N(extent_tree_ad_psearch) +#define extent_tree_ad_remove JEMALLOC_N(extent_tree_ad_remove) +#define extent_tree_ad_reverse_iter JEMALLOC_N(extent_tree_ad_reverse_iter) +#define extent_tree_ad_reverse_iter_recurse JEMALLOC_N(extent_tree_ad_reverse_iter_recurse) +#define extent_tree_ad_reverse_iter_start JEMALLOC_N(extent_tree_ad_reverse_iter_start) +#define extent_tree_ad_search JEMALLOC_N(extent_tree_ad_search) +#define extent_tree_szad_first JEMALLOC_N(extent_tree_szad_first) +#define extent_tree_szad_insert JEMALLOC_N(extent_tree_szad_insert) +#define extent_tree_szad_iter JEMALLOC_N(extent_tree_szad_iter) +#define extent_tree_szad_iter_recurse JEMALLOC_N(extent_tree_szad_iter_recurse) +#define extent_tree_szad_iter_start JEMALLOC_N(extent_tree_szad_iter_start) +#define extent_tree_szad_last JEMALLOC_N(extent_tree_szad_last) +#define extent_tree_szad_new JEMALLOC_N(extent_tree_szad_new) +#define extent_tree_szad_next JEMALLOC_N(extent_tree_szad_next) +#define extent_tree_szad_nsearch JEMALLOC_N(extent_tree_szad_nsearch) +#define extent_tree_szad_prev JEMALLOC_N(extent_tree_szad_prev) +#define extent_tree_szad_psearch JEMALLOC_N(extent_tree_szad_psearch) +#define extent_tree_szad_remove JEMALLOC_N(extent_tree_szad_remove) +#define extent_tree_szad_reverse_iter JEMALLOC_N(extent_tree_szad_reverse_iter) +#define extent_tree_szad_reverse_iter_recurse JEMALLOC_N(extent_tree_szad_reverse_iter_recurse) +#define extent_tree_szad_reverse_iter_start JEMALLOC_N(extent_tree_szad_reverse_iter_start) +#define extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search) +#define hash JEMALLOC_N(hash) +#define huge_boot JEMALLOC_N(huge_boot) +#define huge_dalloc JEMALLOC_N(huge_dalloc) +#define huge_malloc JEMALLOC_N(huge_malloc) +#define huge_palloc JEMALLOC_N(huge_palloc) +#define huge_postfork_child JEMALLOC_N(huge_postfork_child) +#define huge_postfork_parent JEMALLOC_N(huge_postfork_parent) +#define huge_prefork JEMALLOC_N(huge_prefork) +#define huge_prof_ctx_get JEMALLOC_N(huge_prof_ctx_get) +#define huge_prof_ctx_set JEMALLOC_N(huge_prof_ctx_set) +#define huge_ralloc JEMALLOC_N(huge_ralloc) +#define huge_ralloc_no_move JEMALLOC_N(huge_ralloc_no_move) +#define huge_salloc JEMALLOC_N(huge_salloc) +#define iallocm JEMALLOC_N(iallocm) +#define icalloc JEMALLOC_N(icalloc) +#define idalloc JEMALLOC_N(idalloc) +#define imalloc JEMALLOC_N(imalloc) +#define ipalloc JEMALLOC_N(ipalloc) +#define iqalloc JEMALLOC_N(iqalloc) +#define iralloc JEMALLOC_N(iralloc) +#define isalloc JEMALLOC_N(isalloc) +#define ivsalloc JEMALLOC_N(ivsalloc) +#define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child) +#define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent) +#define jemalloc_prefork JEMALLOC_N(jemalloc_prefork) +#define malloc_cprintf JEMALLOC_N(malloc_cprintf) +#define malloc_mutex_init JEMALLOC_N(malloc_mutex_init) +#define malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock) +#define malloc_mutex_postfork_child JEMALLOC_N(malloc_mutex_postfork_child) +#define malloc_mutex_postfork_parent JEMALLOC_N(malloc_mutex_postfork_parent) +#define malloc_mutex_prefork JEMALLOC_N(malloc_mutex_prefork) +#define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock) +#define malloc_printf JEMALLOC_N(malloc_printf) +#define malloc_snprintf JEMALLOC_N(malloc_snprintf) +#define malloc_strtoumax JEMALLOC_N(malloc_strtoumax) +#define malloc_tsd_boot JEMALLOC_N(malloc_tsd_boot) +#define malloc_tsd_cleanup_register JEMALLOC_N(malloc_tsd_cleanup_register) +#define malloc_tsd_dalloc JEMALLOC_N(malloc_tsd_dalloc) +#define malloc_tsd_malloc JEMALLOC_N(malloc_tsd_malloc) +#define malloc_tsd_no_cleanup JEMALLOC_N(malloc_tsd_no_cleanup) +#define malloc_vcprintf JEMALLOC_N(malloc_vcprintf) +#define malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf) +#define malloc_write JEMALLOC_N(malloc_write) +#define mb_write JEMALLOC_N(mb_write) +#define mmap_unaligned_tsd_boot JEMALLOC_N(mmap_unaligned_tsd_boot) +#define mmap_unaligned_tsd_cleanup_wrapper JEMALLOC_N(mmap_unaligned_tsd_cleanup_wrapper) +#define mmap_unaligned_tsd_get JEMALLOC_N(mmap_unaligned_tsd_get) +#define mmap_unaligned_tsd_set JEMALLOC_N(mmap_unaligned_tsd_set) +#define mutex_boot JEMALLOC_N(mutex_boot) +#define opt_abort JEMALLOC_N(opt_abort) +#define opt_junk JEMALLOC_N(opt_junk) +#define opt_lg_chunk JEMALLOC_N(opt_lg_chunk) +#define opt_lg_dirty_mult JEMALLOC_N(opt_lg_dirty_mult) +#define opt_lg_prof_interval JEMALLOC_N(opt_lg_prof_interval) +#define opt_lg_prof_sample JEMALLOC_N(opt_lg_prof_sample) +#define opt_lg_tcache_max JEMALLOC_N(opt_lg_tcache_max) +#define opt_narenas JEMALLOC_N(opt_narenas) +#define opt_prof JEMALLOC_N(opt_prof) +#define opt_prof_accum JEMALLOC_N(opt_prof_accum) +#define opt_prof_active JEMALLOC_N(opt_prof_active) +#define opt_prof_gdump JEMALLOC_N(opt_prof_gdump) +#define opt_prof_leak JEMALLOC_N(opt_prof_leak) +#define opt_stats_print JEMALLOC_N(opt_stats_print) +#define opt_tcache JEMALLOC_N(opt_tcache) +#define opt_utrace JEMALLOC_N(opt_utrace) +#define opt_xmalloc JEMALLOC_N(opt_xmalloc) +#define opt_zero JEMALLOC_N(opt_zero) +#define p2rz JEMALLOC_N(p2rz) +#define pow2_ceil JEMALLOC_N(pow2_ceil) +#define prof_backtrace JEMALLOC_N(prof_backtrace) +#define prof_boot0 JEMALLOC_N(prof_boot0) +#define prof_boot1 JEMALLOC_N(prof_boot1) +#define prof_boot2 JEMALLOC_N(prof_boot2) +#define prof_ctx_get JEMALLOC_N(prof_ctx_get) +#define prof_ctx_set JEMALLOC_N(prof_ctx_set) +#define prof_free JEMALLOC_N(prof_free) +#define prof_gdump JEMALLOC_N(prof_gdump) +#define prof_idump JEMALLOC_N(prof_idump) +#define prof_lookup JEMALLOC_N(prof_lookup) +#define prof_malloc JEMALLOC_N(prof_malloc) +#define prof_mdump JEMALLOC_N(prof_mdump) +#define prof_realloc JEMALLOC_N(prof_realloc) +#define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update) +#define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update) +#define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup) +#define prof_tdata_tsd_boot JEMALLOC_N(prof_tdata_tsd_boot) +#define prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper) +#define prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get) +#define prof_tdata_tsd_set JEMALLOC_N(prof_tdata_tsd_set) +#define pthread_create JEMALLOC_N(pthread_create) +#define quarantine JEMALLOC_N(quarantine) +#define quarantine_boot JEMALLOC_N(quarantine_boot) +#define quarantine_tsd_boot JEMALLOC_N(quarantine_tsd_boot) +#define quarantine_tsd_cleanup_wrapper JEMALLOC_N(quarantine_tsd_cleanup_wrapper) +#define quarantine_tsd_get JEMALLOC_N(quarantine_tsd_get) +#define quarantine_tsd_set JEMALLOC_N(quarantine_tsd_set) +#define register_zone JEMALLOC_N(register_zone) +#define rtree_get JEMALLOC_N(rtree_get) +#define rtree_get_locked JEMALLOC_N(rtree_get_locked) +#define rtree_new JEMALLOC_N(rtree_new) +#define rtree_set JEMALLOC_N(rtree_set) +#define s2u JEMALLOC_N(s2u) +#define sa2u JEMALLOC_N(sa2u) +#define stats_arenas_i_bins_j_index JEMALLOC_N(stats_arenas_i_bins_j_index) +#define stats_arenas_i_index JEMALLOC_N(stats_arenas_i_index) +#define stats_arenas_i_lruns_j_index JEMALLOC_N(stats_arenas_i_lruns_j_index) +#define stats_cactive JEMALLOC_N(stats_cactive) +#define stats_cactive_add JEMALLOC_N(stats_cactive_add) +#define stats_cactive_get JEMALLOC_N(stats_cactive_get) +#define stats_cactive_sub JEMALLOC_N(stats_cactive_sub) +#define stats_print JEMALLOC_N(stats_print) +#define tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy) +#define tcache_alloc_large JEMALLOC_N(tcache_alloc_large) +#define tcache_alloc_small JEMALLOC_N(tcache_alloc_small) +#define tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard) +#define tcache_arena_associate JEMALLOC_N(tcache_arena_associate) +#define tcache_arena_dissociate JEMALLOC_N(tcache_arena_dissociate) +#define tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large) +#define tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small) +#define tcache_boot0 JEMALLOC_N(tcache_boot0) +#define tcache_boot1 JEMALLOC_N(tcache_boot1) +#define tcache_create JEMALLOC_N(tcache_create) +#define tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large) +#define tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small) +#define tcache_destroy JEMALLOC_N(tcache_destroy) +#define tcache_enabled_get JEMALLOC_N(tcache_enabled_get) +#define tcache_enabled_set JEMALLOC_N(tcache_enabled_set) +#define tcache_enabled_tsd_boot JEMALLOC_N(tcache_enabled_tsd_boot) +#define tcache_enabled_tsd_cleanup_wrapper JEMALLOC_N(tcache_enabled_tsd_cleanup_wrapper) +#define tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get) +#define tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set) +#define tcache_event JEMALLOC_N(tcache_event) +#define tcache_flush JEMALLOC_N(tcache_flush) +#define tcache_stats_merge JEMALLOC_N(tcache_stats_merge) +#define tcache_thread_cleanup JEMALLOC_N(tcache_thread_cleanup) +#define tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot) +#define tcache_tsd_cleanup_wrapper JEMALLOC_N(tcache_tsd_cleanup_wrapper) +#define tcache_tsd_get JEMALLOC_N(tcache_tsd_get) +#define tcache_tsd_set JEMALLOC_N(tcache_tsd_set) +#define thread_allocated_tsd_boot JEMALLOC_N(thread_allocated_tsd_boot) +#define thread_allocated_tsd_cleanup_wrapper JEMALLOC_N(thread_allocated_tsd_cleanup_wrapper) +#define thread_allocated_tsd_get JEMALLOC_N(thread_allocated_tsd_get) +#define thread_allocated_tsd_set JEMALLOC_N(thread_allocated_tsd_set) +#define u2rz JEMALLOC_N(u2rz) diff --git a/contrib/jemalloc/include/jemalloc/internal/prng.h b/contrib/jemalloc/include/jemalloc/internal/prng.h new file mode 100644 index 000000000000..83a5462b4dd0 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/prng.h @@ -0,0 +1,60 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * Simple linear congruential pseudo-random number generator: + * + * prng(y) = (a*x + c) % m + * + * where the following constants ensure maximal period: + * + * a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4. + * c == Odd number (relatively prime to 2^n). + * m == 2^32 + * + * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints. + * + * This choice of m has the disadvantage that the quality of the bits is + * proportional to bit position. For example. the lowest bit has a cycle of 2, + * the next has a cycle of 4, etc. For this reason, we prefer to use the upper + * bits. + * + * Macro parameters: + * uint32_t r : Result. + * unsigned lg_range : (0..32], number of least significant bits to return. + * uint32_t state : Seed value. + * const uint32_t a, c : See above discussion. + */ +#define prng32(r, lg_range, state, a, c) do { \ + assert(lg_range > 0); \ + assert(lg_range <= 32); \ + \ + r = (state * (a)) + (c); \ + state = r; \ + r >>= (32 - lg_range); \ +} while (false) + +/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */ +#define prng64(r, lg_range, state, a, c) do { \ + assert(lg_range > 0); \ + assert(lg_range <= 64); \ + \ + r = (state * (a)) + (c); \ + state = r; \ + r >>= (64 - lg_range); \ +} while (false) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/prof.h b/contrib/jemalloc/include/jemalloc/internal/prof.h new file mode 100644 index 000000000000..a37bb4487584 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/prof.h @@ -0,0 +1,535 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct prof_bt_s prof_bt_t; +typedef struct prof_cnt_s prof_cnt_t; +typedef struct prof_thr_cnt_s prof_thr_cnt_t; +typedef struct prof_ctx_s prof_ctx_t; +typedef struct prof_tdata_s prof_tdata_t; + +/* Option defaults. */ +#define PROF_PREFIX_DEFAULT "jeprof" +#define LG_PROF_SAMPLE_DEFAULT 0 +#define LG_PROF_INTERVAL_DEFAULT -1 + +/* + * Hard limit on stack backtrace depth. The version of prof_backtrace() that + * is based on __builtin_return_address() necessarily has a hard-coded number + * of backtrace frame handlers, and should be kept in sync with this setting. + */ +#define PROF_BT_MAX 128 + +/* Maximum number of backtraces to store in each per thread LRU cache. */ +#define PROF_TCMAX 1024 + +/* Initial hash table size. */ +#define PROF_CKH_MINITEMS 64 + +/* Size of memory buffer to use when writing dump files. */ +#define PROF_DUMP_BUFSIZE 65536 + +/* Size of stack-allocated buffer used by prof_printf(). */ +#define PROF_PRINTF_BUFSIZE 128 + +/* + * Number of mutexes shared among all ctx's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NCTX_LOCKS 1024 + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct prof_bt_s { + /* Backtrace, stored as len program counters. */ + void **vec; + unsigned len; +}; + +#ifdef JEMALLOC_PROF_LIBGCC +/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ +typedef struct { + prof_bt_t *bt; + unsigned nignore; + unsigned max; +} prof_unwind_data_t; +#endif + +struct prof_cnt_s { + /* + * Profiling counters. An allocation/deallocation pair can operate on + * different prof_thr_cnt_t objects that are linked into the same + * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go + * negative. In principle it is possible for the *bytes counters to + * overflow/underflow, but a general solution would require something + * like 128-bit counters; this implementation doesn't bother to solve + * that problem. + */ + int64_t curobjs; + int64_t curbytes; + uint64_t accumobjs; + uint64_t accumbytes; +}; + +struct prof_thr_cnt_s { + /* Linkage into prof_ctx_t's cnts_ql. */ + ql_elm(prof_thr_cnt_t) cnts_link; + + /* Linkage into thread's LRU. */ + ql_elm(prof_thr_cnt_t) lru_link; + + /* + * Associated context. If a thread frees an object that it did not + * allocate, it is possible that the context is not cached in the + * thread's hash table, in which case it must be able to look up the + * context, insert a new prof_thr_cnt_t into the thread's hash table, + * and link it into the prof_ctx_t's cnts_ql. + */ + prof_ctx_t *ctx; + + /* + * Threads use memory barriers to update the counters. Since there is + * only ever one writer, the only challenge is for the reader to get a + * consistent read of the counters. + * + * The writer uses this series of operations: + * + * 1) Increment epoch to an odd number. + * 2) Update counters. + * 3) Increment epoch to an even number. + * + * The reader must assure 1) that the epoch is even while it reads the + * counters, and 2) that the epoch doesn't change between the time it + * starts and finishes reading the counters. + */ + unsigned epoch; + + /* Profiling counters. */ + prof_cnt_t cnts; +}; + +struct prof_ctx_s { + /* Associated backtrace. */ + prof_bt_t *bt; + + /* Protects cnt_merged and cnts_ql. */ + malloc_mutex_t *lock; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* When threads exit, they merge their stats into cnt_merged. */ + prof_cnt_t cnt_merged; + + /* + * List of profile counters, one for each thread that has allocated in + * this context. + */ + ql_head(prof_thr_cnt_t) cnts_ql; +}; + +struct prof_tdata_s { + /* + * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a + * cache of backtraces, with associated thread-specific prof_thr_cnt_t + * objects. Other threads may read the prof_thr_cnt_t contents, but no + * others will ever write them. + * + * Upon thread exit, the thread must merge all the prof_thr_cnt_t + * counter data into the associated prof_ctx_t objects, and unlink/free + * the prof_thr_cnt_t objects. + */ + ckh_t bt2cnt; + + /* LRU for contents of bt2cnt. */ + ql_head(prof_thr_cnt_t) lru_ql; + + /* Backtrace vector, used for calls to prof_backtrace(). */ + void **vec; + + /* Sampling state. */ + uint64_t prng_state; + uint64_t threshold; + uint64_t accum; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern bool opt_prof; +/* + * Even if opt_prof is true, sampling can be temporarily disabled by setting + * opt_prof_active to false. No locking is used when updating opt_prof_active, + * so there are no guarantees regarding how long it will take for all threads + * to notice state changes. + */ +extern bool opt_prof_active; +extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ +extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ +extern bool opt_prof_gdump; /* High-water memory dumping. */ +extern bool opt_prof_leak; /* Dump leak summary at exit. */ +extern bool opt_prof_accum; /* Report cumulative bytes. */ +extern char opt_prof_prefix[PATH_MAX + 1]; + +/* + * Profile dump interval, measured in bytes allocated. Each arena triggers a + * profile dump when it reaches this threshold. The effect is that the + * interval between profile dumps averages prof_interval, though the actual + * interval between dumps will tend to be sporadic, and the interval will be a + * maximum of approximately (prof_interval * narenas). + */ +extern uint64_t prof_interval; + +/* + * If true, promote small sampled objects to large objects, since small run + * headers do not have embedded profile context pointers. + */ +extern bool prof_promote; + +void bt_init(prof_bt_t *bt, void **vec); +void prof_backtrace(prof_bt_t *bt, unsigned nignore); +prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); +void prof_idump(void); +bool prof_mdump(const char *filename); +void prof_gdump(void); +prof_tdata_t *prof_tdata_init(void); +void prof_tdata_cleanup(void *arg); +void prof_boot0(void); +void prof_boot1(void); +bool prof_boot2(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#define PROF_ALLOC_PREP(nignore, size, ret) do { \ + prof_tdata_t *prof_tdata; \ + prof_bt_t bt; \ + \ + assert(size == s2u(size)); \ + \ + prof_tdata = *prof_tdata_tsd_get(); \ + if (prof_tdata == NULL) { \ + prof_tdata = prof_tdata_init(); \ + if (prof_tdata == NULL) { \ + ret = NULL; \ + break; \ + } \ + } \ + \ + if (opt_prof_active == false) { \ + /* Sampling is currently inactive, so avoid sampling. */\ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ + } else if (opt_lg_prof_sample == 0) { \ + /* Don't bother with sampling logic, since sampling */\ + /* interval is 1. */\ + bt_init(&bt, prof_tdata->vec); \ + prof_backtrace(&bt, nignore); \ + ret = prof_lookup(&bt); \ + } else { \ + if (prof_tdata->threshold == 0) { \ + /* Initialize. Seed the prng differently for */\ + /* each thread. */\ + prof_tdata->prng_state = \ + (uint64_t)(uintptr_t)&size; \ + prof_sample_threshold_update(prof_tdata); \ + } \ + \ + /* Determine whether to capture a backtrace based on */\ + /* whether size is enough for prof_accum to reach */\ + /* prof_tdata->threshold. However, delay updating */\ + /* these variables until prof_{m,re}alloc(), because */\ + /* we don't know for sure that the allocation will */\ + /* succeed. */\ + /* */\ + /* Use subtraction rather than addition to avoid */\ + /* potential integer overflow. */\ + if (size >= prof_tdata->threshold - \ + prof_tdata->accum) { \ + bt_init(&bt, prof_tdata->vec); \ + prof_backtrace(&bt, nignore); \ + ret = prof_lookup(&bt); \ + } else \ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; \ + } \ +} while (0) + +#ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) + +void prof_sample_threshold_update(prof_tdata_t *prof_tdata); +prof_ctx_t *prof_ctx_get(const void *ptr); +void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +bool prof_sample_accum_update(size_t size); +void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt); +void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, + size_t old_size, prof_ctx_t *old_ctx); +void prof_free(const void *ptr, size_t size); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) +/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ +malloc_tsd_externs(prof_tdata, prof_tdata_t *) +malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, + prof_tdata_cleanup) + +JEMALLOC_INLINE void +prof_sample_threshold_update(prof_tdata_t *prof_tdata) +{ + uint64_t r; + double u; + + cassert(config_prof); + + /* + * Compute sample threshold as a geometrically distributed random + * variable with mean (2^opt_lg_prof_sample). + * + * __ __ + * | log(u) | 1 + * prof_tdata->threshold = | -------- |, where p = ------------------- + * | log(1-p) | opt_lg_prof_sample + * 2 + * + * For more information on the math, see: + * + * Non-Uniform Random Variate Generation + * Luc Devroye + * Springer-Verlag, New York, 1986 + * pp 500 + * (http://cg.scs.carleton.ca/~luc/rnbookindex.html) + */ + prng64(r, 53, prof_tdata->prng_state, + UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); + u = (double)r * (1.0/9007199254740992.0L); + prof_tdata->threshold = (uint64_t)(log(u) / + log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) + + (uint64_t)1U; +} + +JEMALLOC_INLINE prof_ctx_t * +prof_ctx_get(const void *ptr) +{ + prof_ctx_t *ret; + arena_chunk_t *chunk; + + cassert(config_prof); + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) { + /* Region. */ + ret = arena_prof_ctx_get(ptr); + } else + ret = huge_prof_ctx_get(ptr); + + return (ret); +} + +JEMALLOC_INLINE void +prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +{ + arena_chunk_t *chunk; + + cassert(config_prof); + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) { + /* Region. */ + arena_prof_ctx_set(ptr, ctx); + } else + huge_prof_ctx_set(ptr, ctx); +} + +JEMALLOC_INLINE bool +prof_sample_accum_update(size_t size) +{ + prof_tdata_t *prof_tdata; + + cassert(config_prof); + /* Sampling logic is unnecessary if the interval is 1. */ + assert(opt_lg_prof_sample != 0); + + prof_tdata = *prof_tdata_tsd_get(); + assert(prof_tdata != NULL); + + /* Take care to avoid integer overflow. */ + if (size >= prof_tdata->threshold - prof_tdata->accum) { + prof_tdata->accum -= (prof_tdata->threshold - size); + /* Compute new sample threshold. */ + prof_sample_threshold_update(prof_tdata); + while (prof_tdata->accum >= prof_tdata->threshold) { + prof_tdata->accum -= prof_tdata->threshold; + prof_sample_threshold_update(prof_tdata); + } + return (false); + } else { + prof_tdata->accum += size; + return (true); + } +} + +JEMALLOC_INLINE void +prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) +{ + + cassert(config_prof); + assert(ptr != NULL); + assert(size == isalloc(ptr, true)); + + if (opt_lg_prof_sample != 0) { + if (prof_sample_accum_update(size)) { + /* + * Don't sample. For malloc()-like allocation, it is + * always possible to tell in advance how large an + * object's usable size will be, so there should never + * be a difference between the size passed to + * PROF_ALLOC_PREP() and prof_malloc(). + */ + assert((uintptr_t)cnt == (uintptr_t)1U); + } + } + + if ((uintptr_t)cnt > (uintptr_t)1U) { + prof_ctx_set(ptr, cnt->ctx); + + cnt->epoch++; + /*********/ + mb_write(); + /*********/ + cnt->cnts.curobjs++; + cnt->cnts.curbytes += size; + if (opt_prof_accum) { + cnt->cnts.accumobjs++; + cnt->cnts.accumbytes += size; + } + /*********/ + mb_write(); + /*********/ + cnt->epoch++; + /*********/ + mb_write(); + /*********/ + } else + prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); +} + +JEMALLOC_INLINE void +prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, + size_t old_size, prof_ctx_t *old_ctx) +{ + prof_thr_cnt_t *told_cnt; + + cassert(config_prof); + assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); + + if (ptr != NULL) { + assert(size == isalloc(ptr, true)); + if (opt_lg_prof_sample != 0) { + if (prof_sample_accum_update(size)) { + /* + * Don't sample. The size passed to + * PROF_ALLOC_PREP() was larger than what + * actually got allocated, so a backtrace was + * captured for this allocation, even though + * its actual size was insufficient to cross + * the sample threshold. + */ + cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + } + } + } + + if ((uintptr_t)old_ctx > (uintptr_t)1U) { + told_cnt = prof_lookup(old_ctx->bt); + if (told_cnt == NULL) { + /* + * It's too late to propagate OOM for this realloc(), + * so operate directly on old_cnt->ctx->cnt_merged. + */ + malloc_mutex_lock(old_ctx->lock); + old_ctx->cnt_merged.curobjs--; + old_ctx->cnt_merged.curbytes -= old_size; + malloc_mutex_unlock(old_ctx->lock); + told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + } + } else + told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + + if ((uintptr_t)told_cnt > (uintptr_t)1U) + told_cnt->epoch++; + if ((uintptr_t)cnt > (uintptr_t)1U) { + prof_ctx_set(ptr, cnt->ctx); + cnt->epoch++; + } else + prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); + /*********/ + mb_write(); + /*********/ + if ((uintptr_t)told_cnt > (uintptr_t)1U) { + told_cnt->cnts.curobjs--; + told_cnt->cnts.curbytes -= old_size; + } + if ((uintptr_t)cnt > (uintptr_t)1U) { + cnt->cnts.curobjs++; + cnt->cnts.curbytes += size; + if (opt_prof_accum) { + cnt->cnts.accumobjs++; + cnt->cnts.accumbytes += size; + } + } + /*********/ + mb_write(); + /*********/ + if ((uintptr_t)told_cnt > (uintptr_t)1U) + told_cnt->epoch++; + if ((uintptr_t)cnt > (uintptr_t)1U) + cnt->epoch++; + /*********/ + mb_write(); /* Not strictly necessary. */ +} + +JEMALLOC_INLINE void +prof_free(const void *ptr, size_t size) +{ + prof_ctx_t *ctx = prof_ctx_get(ptr); + + cassert(config_prof); + + if ((uintptr_t)ctx > (uintptr_t)1) { + assert(size == isalloc(ptr, true)); + prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); + + if (tcnt != NULL) { + tcnt->epoch++; + /*********/ + mb_write(); + /*********/ + tcnt->cnts.curobjs--; + tcnt->cnts.curbytes -= size; + /*********/ + mb_write(); + /*********/ + tcnt->epoch++; + /*********/ + mb_write(); + /*********/ + } else { + /* + * OOM during free() cannot be propagated, so operate + * directly on cnt->ctx->cnt_merged. + */ + malloc_mutex_lock(ctx->lock); + ctx->cnt_merged.curobjs--; + ctx->cnt_merged.curbytes -= size; + malloc_mutex_unlock(ctx->lock); + } + } +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/lib/libc/stdlib/ql.h b/contrib/jemalloc/include/jemalloc/internal/ql.h similarity index 57% rename from lib/libc/stdlib/ql.h rename to contrib/jemalloc/include/jemalloc/internal/ql.h index a1bc3abc238f..a9ed2393f0c2 100644 --- a/lib/libc/stdlib/ql.h +++ b/contrib/jemalloc/include/jemalloc/internal/ql.h @@ -1,40 +1,3 @@ -/****************************************************************************** - * - * Copyright (C) 2002 Jason Evans . - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice(s), this list of conditions and the following disclaimer - * unmodified other than the allowable addition of one or more - * copyright notices. - * 2. Redistributions in binary form must reproduce the above copyright - * notice(s), this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#ifndef QL_H_ -#define QL_H_ - -#include -__FBSDID("$FreeBSD$"); - /* * List definitions. */ @@ -118,5 +81,3 @@ struct { \ #define ql_reverse_foreach(a_var, a_head, a_field) \ qr_reverse_foreach((a_var), ql_first(a_head), a_field) - -#endif /* QL_H_ */ diff --git a/lib/libc/stdlib/qr.h b/contrib/jemalloc/include/jemalloc/internal/qr.h similarity index 56% rename from lib/libc/stdlib/qr.h rename to contrib/jemalloc/include/jemalloc/internal/qr.h index 6e02321be55c..fe22352feddc 100644 --- a/lib/libc/stdlib/qr.h +++ b/contrib/jemalloc/include/jemalloc/internal/qr.h @@ -1,40 +1,3 @@ -/****************************************************************************** - * - * Copyright (C) 2002 Jason Evans . - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice(s), this list of conditions and the following disclaimer - * unmodified other than the allowable addition of one or more - * copyright notices. - * 2. Redistributions in binary form must reproduce the above copyright - * notice(s), this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#ifndef QR_H_ -#define QR_H_ - -#include -__FBSDID("$FreeBSD$"); - /* Ring definitions. */ #define qr(a_type) \ struct { \ @@ -102,5 +65,3 @@ struct { \ (var) != NULL; \ (var) = (((var) != (a_qr)) \ ? (var)->a_field.qre_prev : NULL)) - -#endif /* QR_H_ */ diff --git a/contrib/jemalloc/include/jemalloc/internal/quarantine.h b/contrib/jemalloc/include/jemalloc/internal/quarantine.h new file mode 100644 index 000000000000..38f3d696e93d --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/quarantine.h @@ -0,0 +1,24 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Default per thread quarantine size if valgrind is enabled. */ +#define JEMALLOC_VALGRIND_QUARANTINE_DEFAULT (ZU(1) << 24) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void quarantine(void *ptr); +bool quarantine_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/lib/libc/stdlib/rb.h b/contrib/jemalloc/include/jemalloc/internal/rb.h similarity index 88% rename from lib/libc/stdlib/rb.h rename to contrib/jemalloc/include/jemalloc/internal/rb.h index baaec7f29c77..4d89ce522016 100644 --- a/lib/libc/stdlib/rb.h +++ b/contrib/jemalloc/include/jemalloc/internal/rb.h @@ -1,35 +1,6 @@ /*- ******************************************************************************* * - * Copyright (C) 2008-2010 Jason Evans . - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice(s), this list of conditions and the following disclaimer - * unmodified other than the allowable addition of one or more - * copyright notices. - * 2. Redistributions in binary form must reproduce the above copyright - * notice(s), this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ****************************************************************************** - * * cpp macro implementation of left-leaning 2-3 red-black trees. Parent * pointers are not used, and color bits are stored in the least significant * bit of right-child pointers (if RB_COMPACT is defined), thus making node @@ -51,8 +22,9 @@ #ifndef RB_H_ #define RB_H_ -#include +#if 0 __FBSDID("$FreeBSD$"); +#endif #ifdef RB_COMPACT /* Node structure. */ @@ -222,11 +194,10 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * Arguments: * * a_attr : Function attribute for generated functions (ex: static). - * a_prefix : Prefix for generated functions (ex: extree_). - * a_rb_type : Type for red-black tree data structure (ex: extree_t). - * a_type : Type for red-black tree node data structure (ex: - * extree_node_t). - * a_field : Name of red-black tree node linkage (ex: extree_link). + * a_prefix : Prefix for generated functions (ex: ex_). + * a_rb_type : Type for red-black tree data structure (ex: ex_t). + * a_type : Type for red-black tree node data structure (ex: ex_node_t). + * a_field : Name of red-black tree node linkage (ex: ex_link). * a_cmp : Node comparison function name, with the following prototype: * int (a_cmp *)(a_type *a_node, a_type *a_other); * ^^^^^^ @@ -246,94 +217,94 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ * struct ex_node_s { * rb_node(ex_node_t) ex_link; * }; - * typedef rb(ex_node_t) ex_t; - * rb_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_cmp, 1297, 1301) + * typedef rb_tree(ex_node_t) ex_t; + * rb_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_cmp) * * The following API is generated: * * static void - * ex_new(ex_t *extree); + * ex_new(ex_t *tree); * Description: Initialize a red-black tree structure. * Args: - * extree: Pointer to an uninitialized red-black tree object. + * tree: Pointer to an uninitialized red-black tree object. * * static ex_node_t * - * ex_first(ex_t *extree); + * ex_first(ex_t *tree); * static ex_node_t * - * ex_last(ex_t *extree); - * Description: Get the first/last node in extree. + * ex_last(ex_t *tree); + * Description: Get the first/last node in tree. * Args: - * extree: Pointer to an initialized red-black tree object. - * Ret: First/last node in extree, or NULL if extree is empty. + * tree: Pointer to an initialized red-black tree object. + * Ret: First/last node in tree, or NULL if tree is empty. * * static ex_node_t * - * ex_next(ex_t *extree, ex_node_t *node); + * ex_next(ex_t *tree, ex_node_t *node); * static ex_node_t * - * ex_prev(ex_t *extree, ex_node_t *node); + * ex_prev(ex_t *tree, ex_node_t *node); * Description: Get node's successor/predecessor. * Args: - * extree: Pointer to an initialized red-black tree object. - * node : A node in extree. - * Ret: node's successor/predecessor in extree, or NULL if node is + * tree: Pointer to an initialized red-black tree object. + * node: A node in tree. + * Ret: node's successor/predecessor in tree, or NULL if node is * last/first. * * static ex_node_t * - * ex_search(ex_t *extree, ex_node_t *key); + * ex_search(ex_t *tree, ex_node_t *key); * Description: Search for node that matches key. * Args: - * extree: Pointer to an initialized red-black tree object. - * key : Search key. - * Ret: Node in extree that matches key, or NULL if no match. + * tree: Pointer to an initialized red-black tree object. + * key : Search key. + * Ret: Node in tree that matches key, or NULL if no match. * * static ex_node_t * - * ex_nsearch(ex_t *extree, ex_node_t *key); + * ex_nsearch(ex_t *tree, ex_node_t *key); * static ex_node_t * - * ex_psearch(ex_t *extree, ex_node_t *key); + * ex_psearch(ex_t *tree, ex_node_t *key); * Description: Search for node that matches key. If no match is found, * return what would be key's successor/predecessor, were - * key in extree. + * key in tree. * Args: - * extree: Pointer to an initialized red-black tree object. - * key : Search key. - * Ret: Node in extree that matches key, or if no match, hypothetical - * node's successor/predecessor (NULL if no successor/predecessor). + * tree: Pointer to an initialized red-black tree object. + * key : Search key. + * Ret: Node in tree that matches key, or if no match, hypothetical node's + * successor/predecessor (NULL if no successor/predecessor). * * static void - * ex_insert(ex_t *extree, ex_node_t *node); - * Description: Insert node into extree. + * ex_insert(ex_t *tree, ex_node_t *node); + * Description: Insert node into tree. * Args: - * extree: Pointer to an initialized red-black tree object. - * node : Node to be inserted into extree. + * tree: Pointer to an initialized red-black tree object. + * node: Node to be inserted into tree. * * static void - * ex_remove(ex_t *extree, ex_node_t *node); - * Description: Remove node from extree. + * ex_remove(ex_t *tree, ex_node_t *node); + * Description: Remove node from tree. * Args: - * extree: Pointer to an initialized red-black tree object. - * node : Node in extree to be removed. + * tree: Pointer to an initialized red-black tree object. + * node: Node in tree to be removed. * * static ex_node_t * - * ex_iter(ex_t *extree, ex_node_t *start, ex_node_t *(*cb)(ex_t *, + * ex_iter(ex_t *tree, ex_node_t *start, ex_node_t *(*cb)(ex_t *, * ex_node_t *, void *), void *arg); * static ex_node_t * - * ex_reverse_iter(ex_t *extree, ex_node_t *start, ex_node *(*cb)(ex_t *, + * ex_reverse_iter(ex_t *tree, ex_node_t *start, ex_node *(*cb)(ex_t *, * ex_node_t *, void *), void *arg); - * Description: Iterate forward/backward over extree, starting at node. - * If extree is modified, iteration must be immediately + * Description: Iterate forward/backward over tree, starting at node. If + * tree is modified, iteration must be immediately * terminated by the callback function that causes the * modification. * Args: - * extree: Pointer to an initialized red-black tree object. - * start : Node at which to start iteration, or NULL to start at - * first/last node. - * cb : Callback function, which is called for each node during - * iteration. Under normal circumstances the callback function - * should return NULL, which causes iteration to continue. If a - * callback function returns non-NULL, iteration is immediately - * terminated and the non-NULL return value is returned by the - * iterator. This is useful for re-starting iteration after - * modifying extree. - * arg : Opaque pointer passed to cb(). + * tree : Pointer to an initialized red-black tree object. + * start: Node at which to start iteration, or NULL to start at + * first/last node. + * cb : Callback function, which is called for each node during + * iteration. Under normal circumstances the callback function + * should return NULL, which causes iteration to continue. If a + * callback function returns non-NULL, iteration is immediately + * terminated and the non-NULL return value is returned by the + * iterator. This is useful for re-starting iteration after + * modifying tree. + * arg : Opaque pointer passed to cb(). * Ret: NULL if iteration completed, or the non-NULL callback return value * that caused termination of the iteration. */ diff --git a/contrib/jemalloc/include/jemalloc/internal/rtree.h b/contrib/jemalloc/include/jemalloc/internal/rtree.h new file mode 100644 index 000000000000..95d6355a5f49 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/rtree.h @@ -0,0 +1,161 @@ +/* + * This radix tree implementation is tailored to the singular purpose of + * tracking which chunks are currently owned by jemalloc. This functionality + * is mandatory for OS X, where jemalloc must be able to respond to object + * ownership queries. + * + ******************************************************************************* + */ +#ifdef JEMALLOC_H_TYPES + +typedef struct rtree_s rtree_t; + +/* + * Size of each radix tree node (must be a power of 2). This impacts tree + * depth. + */ +#if (LG_SIZEOF_PTR == 2) +# define RTREE_NODESIZE (1U << 14) +#else +# define RTREE_NODESIZE CACHELINE +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct rtree_s { + malloc_mutex_t mutex; + void **root; + unsigned height; + unsigned level2bits[1]; /* Dynamically sized. */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +rtree_t *rtree_new(unsigned bits); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +#ifndef JEMALLOC_DEBUG +void *rtree_get_locked(rtree_t *rtree, uintptr_t key); +#endif +void *rtree_get(rtree_t *rtree, uintptr_t key); +bool rtree_set(rtree_t *rtree, uintptr_t key, void *val); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) +#define RTREE_GET_GENERATE(f) \ +/* The least significant bits of the key are ignored. */ \ +JEMALLOC_INLINE void * \ +f(rtree_t *rtree, uintptr_t key) \ +{ \ + void *ret; \ + uintptr_t subkey; \ + unsigned i, lshift, height, bits; \ + void **node, **child; \ + \ + RTREE_LOCK(&rtree->mutex); \ + for (i = lshift = 0, height = rtree->height, node = rtree->root;\ + i < height - 1; \ + i++, lshift += bits, node = child) { \ + bits = rtree->level2bits[i]; \ + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \ + 3)) - bits); \ + child = (void**)node[subkey]; \ + if (child == NULL) { \ + RTREE_UNLOCK(&rtree->mutex); \ + return (NULL); \ + } \ + } \ + \ + /* \ + * node is a leaf, so it contains values rather than node \ + * pointers. \ + */ \ + bits = rtree->level2bits[i]; \ + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \ + bits); \ + ret = node[subkey]; \ + RTREE_UNLOCK(&rtree->mutex); \ + \ + RTREE_GET_VALIDATE \ + return (ret); \ +} + +#ifdef JEMALLOC_DEBUG +# define RTREE_LOCK(l) malloc_mutex_lock(l) +# define RTREE_UNLOCK(l) malloc_mutex_unlock(l) +# define RTREE_GET_VALIDATE +RTREE_GET_GENERATE(rtree_get_locked) +# undef RTREE_LOCK +# undef RTREE_UNLOCK +# undef RTREE_GET_VALIDATE +#endif + +#define RTREE_LOCK(l) +#define RTREE_UNLOCK(l) +#ifdef JEMALLOC_DEBUG + /* + * Suppose that it were possible for a jemalloc-allocated chunk to be + * munmap()ped, followed by a different allocator in another thread re-using + * overlapping virtual memory, all without invalidating the cached rtree + * value. The result would be a false positive (the rtree would claim that + * jemalloc owns memory that it had actually discarded). This scenario + * seems impossible, but the following assertion is a prudent sanity check. + */ +# define RTREE_GET_VALIDATE \ + assert(rtree_get_locked(rtree, key) == ret); +#else +# define RTREE_GET_VALIDATE +#endif +RTREE_GET_GENERATE(rtree_get) +#undef RTREE_LOCK +#undef RTREE_UNLOCK +#undef RTREE_GET_VALIDATE + +JEMALLOC_INLINE bool +rtree_set(rtree_t *rtree, uintptr_t key, void *val) +{ + uintptr_t subkey; + unsigned i, lshift, height, bits; + void **node, **child; + + malloc_mutex_lock(&rtree->mutex); + for (i = lshift = 0, height = rtree->height, node = rtree->root; + i < height - 1; + i++, lshift += bits, node = child) { + bits = rtree->level2bits[i]; + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - + bits); + child = (void**)node[subkey]; + if (child == NULL) { + child = (void**)base_alloc(sizeof(void *) << + rtree->level2bits[i+1]); + if (child == NULL) { + malloc_mutex_unlock(&rtree->mutex); + return (true); + } + memset(child, 0, sizeof(void *) << + rtree->level2bits[i+1]); + node[subkey] = child; + } + } + + /* node is a leaf, so it contains values rather than node pointers. */ + bits = rtree->level2bits[i]; + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits); + node[subkey] = val; + malloc_mutex_unlock(&rtree->mutex); + + return (false); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/size_classes.h b/contrib/jemalloc/include/jemalloc/internal/size_classes.h new file mode 100644 index 000000000000..821102e5c1cd --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/size_classes.h @@ -0,0 +1,721 @@ +/* This file was automatically generated by size_classes.sh. */ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 12) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 8, 24) \ + SIZE_CLASS(3, 8, 32) \ + SIZE_CLASS(4, 8, 40) \ + SIZE_CLASS(5, 8, 48) \ + SIZE_CLASS(6, 8, 56) \ + SIZE_CLASS(7, 8, 64) \ + SIZE_CLASS(8, 16, 80) \ + SIZE_CLASS(9, 16, 96) \ + SIZE_CLASS(10, 16, 112) \ + SIZE_CLASS(11, 16, 128) \ + SIZE_CLASS(12, 32, 160) \ + SIZE_CLASS(13, 32, 192) \ + SIZE_CLASS(14, 32, 224) \ + SIZE_CLASS(15, 32, 256) \ + SIZE_CLASS(16, 64, 320) \ + SIZE_CLASS(17, 64, 384) \ + SIZE_CLASS(18, 64, 448) \ + SIZE_CLASS(19, 64, 512) \ + SIZE_CLASS(20, 128, 640) \ + SIZE_CLASS(21, 128, 768) \ + SIZE_CLASS(22, 128, 896) \ + SIZE_CLASS(23, 128, 1024) \ + SIZE_CLASS(24, 256, 1280) \ + SIZE_CLASS(25, 256, 1536) \ + SIZE_CLASS(26, 256, 1792) \ + SIZE_CLASS(27, 256, 2048) \ + SIZE_CLASS(28, 512, 2560) \ + SIZE_CLASS(29, 512, 3072) \ + SIZE_CLASS(30, 512, 3584) \ + +#define NBINS 31 +#define SMALL_MAXCLASS 3584 +#endif + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 13) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 8, 24) \ + SIZE_CLASS(3, 8, 32) \ + SIZE_CLASS(4, 8, 40) \ + SIZE_CLASS(5, 8, 48) \ + SIZE_CLASS(6, 8, 56) \ + SIZE_CLASS(7, 8, 64) \ + SIZE_CLASS(8, 16, 80) \ + SIZE_CLASS(9, 16, 96) \ + SIZE_CLASS(10, 16, 112) \ + SIZE_CLASS(11, 16, 128) \ + SIZE_CLASS(12, 32, 160) \ + SIZE_CLASS(13, 32, 192) \ + SIZE_CLASS(14, 32, 224) \ + SIZE_CLASS(15, 32, 256) \ + SIZE_CLASS(16, 64, 320) \ + SIZE_CLASS(17, 64, 384) \ + SIZE_CLASS(18, 64, 448) \ + SIZE_CLASS(19, 64, 512) \ + SIZE_CLASS(20, 128, 640) \ + SIZE_CLASS(21, 128, 768) \ + SIZE_CLASS(22, 128, 896) \ + SIZE_CLASS(23, 128, 1024) \ + SIZE_CLASS(24, 256, 1280) \ + SIZE_CLASS(25, 256, 1536) \ + SIZE_CLASS(26, 256, 1792) \ + SIZE_CLASS(27, 256, 2048) \ + SIZE_CLASS(28, 512, 2560) \ + SIZE_CLASS(29, 512, 3072) \ + SIZE_CLASS(30, 512, 3584) \ + SIZE_CLASS(31, 512, 4096) \ + SIZE_CLASS(32, 1024, 5120) \ + SIZE_CLASS(33, 1024, 6144) \ + SIZE_CLASS(34, 1024, 7168) \ + +#define NBINS 35 +#define SMALL_MAXCLASS 7168 +#endif + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 14) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 8, 24) \ + SIZE_CLASS(3, 8, 32) \ + SIZE_CLASS(4, 8, 40) \ + SIZE_CLASS(5, 8, 48) \ + SIZE_CLASS(6, 8, 56) \ + SIZE_CLASS(7, 8, 64) \ + SIZE_CLASS(8, 16, 80) \ + SIZE_CLASS(9, 16, 96) \ + SIZE_CLASS(10, 16, 112) \ + SIZE_CLASS(11, 16, 128) \ + SIZE_CLASS(12, 32, 160) \ + SIZE_CLASS(13, 32, 192) \ + SIZE_CLASS(14, 32, 224) \ + SIZE_CLASS(15, 32, 256) \ + SIZE_CLASS(16, 64, 320) \ + SIZE_CLASS(17, 64, 384) \ + SIZE_CLASS(18, 64, 448) \ + SIZE_CLASS(19, 64, 512) \ + SIZE_CLASS(20, 128, 640) \ + SIZE_CLASS(21, 128, 768) \ + SIZE_CLASS(22, 128, 896) \ + SIZE_CLASS(23, 128, 1024) \ + SIZE_CLASS(24, 256, 1280) \ + SIZE_CLASS(25, 256, 1536) \ + SIZE_CLASS(26, 256, 1792) \ + SIZE_CLASS(27, 256, 2048) \ + SIZE_CLASS(28, 512, 2560) \ + SIZE_CLASS(29, 512, 3072) \ + SIZE_CLASS(30, 512, 3584) \ + SIZE_CLASS(31, 512, 4096) \ + SIZE_CLASS(32, 1024, 5120) \ + SIZE_CLASS(33, 1024, 6144) \ + SIZE_CLASS(34, 1024, 7168) \ + SIZE_CLASS(35, 1024, 8192) \ + SIZE_CLASS(36, 2048, 10240) \ + SIZE_CLASS(37, 2048, 12288) \ + SIZE_CLASS(38, 2048, 14336) \ + +#define NBINS 39 +#define SMALL_MAXCLASS 14336 +#endif + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 15) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 8, 24) \ + SIZE_CLASS(3, 8, 32) \ + SIZE_CLASS(4, 8, 40) \ + SIZE_CLASS(5, 8, 48) \ + SIZE_CLASS(6, 8, 56) \ + SIZE_CLASS(7, 8, 64) \ + SIZE_CLASS(8, 16, 80) \ + SIZE_CLASS(9, 16, 96) \ + SIZE_CLASS(10, 16, 112) \ + SIZE_CLASS(11, 16, 128) \ + SIZE_CLASS(12, 32, 160) \ + SIZE_CLASS(13, 32, 192) \ + SIZE_CLASS(14, 32, 224) \ + SIZE_CLASS(15, 32, 256) \ + SIZE_CLASS(16, 64, 320) \ + SIZE_CLASS(17, 64, 384) \ + SIZE_CLASS(18, 64, 448) \ + SIZE_CLASS(19, 64, 512) \ + SIZE_CLASS(20, 128, 640) \ + SIZE_CLASS(21, 128, 768) \ + SIZE_CLASS(22, 128, 896) \ + SIZE_CLASS(23, 128, 1024) \ + SIZE_CLASS(24, 256, 1280) \ + SIZE_CLASS(25, 256, 1536) \ + SIZE_CLASS(26, 256, 1792) \ + SIZE_CLASS(27, 256, 2048) \ + SIZE_CLASS(28, 512, 2560) \ + SIZE_CLASS(29, 512, 3072) \ + SIZE_CLASS(30, 512, 3584) \ + SIZE_CLASS(31, 512, 4096) \ + SIZE_CLASS(32, 1024, 5120) \ + SIZE_CLASS(33, 1024, 6144) \ + SIZE_CLASS(34, 1024, 7168) \ + SIZE_CLASS(35, 1024, 8192) \ + SIZE_CLASS(36, 2048, 10240) \ + SIZE_CLASS(37, 2048, 12288) \ + SIZE_CLASS(38, 2048, 14336) \ + SIZE_CLASS(39, 2048, 16384) \ + SIZE_CLASS(40, 4096, 20480) \ + SIZE_CLASS(41, 4096, 24576) \ + SIZE_CLASS(42, 4096, 28672) \ + +#define NBINS 43 +#define SMALL_MAXCLASS 28672 +#endif + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 3 && LG_PAGE == 16) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 8, 24) \ + SIZE_CLASS(3, 8, 32) \ + SIZE_CLASS(4, 8, 40) \ + SIZE_CLASS(5, 8, 48) \ + SIZE_CLASS(6, 8, 56) \ + SIZE_CLASS(7, 8, 64) \ + SIZE_CLASS(8, 16, 80) \ + SIZE_CLASS(9, 16, 96) \ + SIZE_CLASS(10, 16, 112) \ + SIZE_CLASS(11, 16, 128) \ + SIZE_CLASS(12, 32, 160) \ + SIZE_CLASS(13, 32, 192) \ + SIZE_CLASS(14, 32, 224) \ + SIZE_CLASS(15, 32, 256) \ + SIZE_CLASS(16, 64, 320) \ + SIZE_CLASS(17, 64, 384) \ + SIZE_CLASS(18, 64, 448) \ + SIZE_CLASS(19, 64, 512) \ + SIZE_CLASS(20, 128, 640) \ + SIZE_CLASS(21, 128, 768) \ + SIZE_CLASS(22, 128, 896) \ + SIZE_CLASS(23, 128, 1024) \ + SIZE_CLASS(24, 256, 1280) \ + SIZE_CLASS(25, 256, 1536) \ + SIZE_CLASS(26, 256, 1792) \ + SIZE_CLASS(27, 256, 2048) \ + SIZE_CLASS(28, 512, 2560) \ + SIZE_CLASS(29, 512, 3072) \ + SIZE_CLASS(30, 512, 3584) \ + SIZE_CLASS(31, 512, 4096) \ + SIZE_CLASS(32, 1024, 5120) \ + SIZE_CLASS(33, 1024, 6144) \ + SIZE_CLASS(34, 1024, 7168) \ + SIZE_CLASS(35, 1024, 8192) \ + SIZE_CLASS(36, 2048, 10240) \ + SIZE_CLASS(37, 2048, 12288) \ + SIZE_CLASS(38, 2048, 14336) \ + SIZE_CLASS(39, 2048, 16384) \ + SIZE_CLASS(40, 4096, 20480) \ + SIZE_CLASS(41, 4096, 24576) \ + SIZE_CLASS(42, 4096, 28672) \ + SIZE_CLASS(43, 4096, 32768) \ + SIZE_CLASS(44, 8192, 40960) \ + SIZE_CLASS(45, 8192, 49152) \ + SIZE_CLASS(46, 8192, 57344) \ + +#define NBINS 47 +#define SMALL_MAXCLASS 57344 +#endif + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 12) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 16, 32) \ + SIZE_CLASS(3, 16, 48) \ + SIZE_CLASS(4, 16, 64) \ + SIZE_CLASS(5, 16, 80) \ + SIZE_CLASS(6, 16, 96) \ + SIZE_CLASS(7, 16, 112) \ + SIZE_CLASS(8, 16, 128) \ + SIZE_CLASS(9, 32, 160) \ + SIZE_CLASS(10, 32, 192) \ + SIZE_CLASS(11, 32, 224) \ + SIZE_CLASS(12, 32, 256) \ + SIZE_CLASS(13, 64, 320) \ + SIZE_CLASS(14, 64, 384) \ + SIZE_CLASS(15, 64, 448) \ + SIZE_CLASS(16, 64, 512) \ + SIZE_CLASS(17, 128, 640) \ + SIZE_CLASS(18, 128, 768) \ + SIZE_CLASS(19, 128, 896) \ + SIZE_CLASS(20, 128, 1024) \ + SIZE_CLASS(21, 256, 1280) \ + SIZE_CLASS(22, 256, 1536) \ + SIZE_CLASS(23, 256, 1792) \ + SIZE_CLASS(24, 256, 2048) \ + SIZE_CLASS(25, 512, 2560) \ + SIZE_CLASS(26, 512, 3072) \ + SIZE_CLASS(27, 512, 3584) \ + +#define NBINS 28 +#define SMALL_MAXCLASS 3584 +#endif + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 13) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 16, 32) \ + SIZE_CLASS(3, 16, 48) \ + SIZE_CLASS(4, 16, 64) \ + SIZE_CLASS(5, 16, 80) \ + SIZE_CLASS(6, 16, 96) \ + SIZE_CLASS(7, 16, 112) \ + SIZE_CLASS(8, 16, 128) \ + SIZE_CLASS(9, 32, 160) \ + SIZE_CLASS(10, 32, 192) \ + SIZE_CLASS(11, 32, 224) \ + SIZE_CLASS(12, 32, 256) \ + SIZE_CLASS(13, 64, 320) \ + SIZE_CLASS(14, 64, 384) \ + SIZE_CLASS(15, 64, 448) \ + SIZE_CLASS(16, 64, 512) \ + SIZE_CLASS(17, 128, 640) \ + SIZE_CLASS(18, 128, 768) \ + SIZE_CLASS(19, 128, 896) \ + SIZE_CLASS(20, 128, 1024) \ + SIZE_CLASS(21, 256, 1280) \ + SIZE_CLASS(22, 256, 1536) \ + SIZE_CLASS(23, 256, 1792) \ + SIZE_CLASS(24, 256, 2048) \ + SIZE_CLASS(25, 512, 2560) \ + SIZE_CLASS(26, 512, 3072) \ + SIZE_CLASS(27, 512, 3584) \ + SIZE_CLASS(28, 512, 4096) \ + SIZE_CLASS(29, 1024, 5120) \ + SIZE_CLASS(30, 1024, 6144) \ + SIZE_CLASS(31, 1024, 7168) \ + +#define NBINS 32 +#define SMALL_MAXCLASS 7168 +#endif + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 14) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 16, 32) \ + SIZE_CLASS(3, 16, 48) \ + SIZE_CLASS(4, 16, 64) \ + SIZE_CLASS(5, 16, 80) \ + SIZE_CLASS(6, 16, 96) \ + SIZE_CLASS(7, 16, 112) \ + SIZE_CLASS(8, 16, 128) \ + SIZE_CLASS(9, 32, 160) \ + SIZE_CLASS(10, 32, 192) \ + SIZE_CLASS(11, 32, 224) \ + SIZE_CLASS(12, 32, 256) \ + SIZE_CLASS(13, 64, 320) \ + SIZE_CLASS(14, 64, 384) \ + SIZE_CLASS(15, 64, 448) \ + SIZE_CLASS(16, 64, 512) \ + SIZE_CLASS(17, 128, 640) \ + SIZE_CLASS(18, 128, 768) \ + SIZE_CLASS(19, 128, 896) \ + SIZE_CLASS(20, 128, 1024) \ + SIZE_CLASS(21, 256, 1280) \ + SIZE_CLASS(22, 256, 1536) \ + SIZE_CLASS(23, 256, 1792) \ + SIZE_CLASS(24, 256, 2048) \ + SIZE_CLASS(25, 512, 2560) \ + SIZE_CLASS(26, 512, 3072) \ + SIZE_CLASS(27, 512, 3584) \ + SIZE_CLASS(28, 512, 4096) \ + SIZE_CLASS(29, 1024, 5120) \ + SIZE_CLASS(30, 1024, 6144) \ + SIZE_CLASS(31, 1024, 7168) \ + SIZE_CLASS(32, 1024, 8192) \ + SIZE_CLASS(33, 2048, 10240) \ + SIZE_CLASS(34, 2048, 12288) \ + SIZE_CLASS(35, 2048, 14336) \ + +#define NBINS 36 +#define SMALL_MAXCLASS 14336 +#endif + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 15) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 16, 32) \ + SIZE_CLASS(3, 16, 48) \ + SIZE_CLASS(4, 16, 64) \ + SIZE_CLASS(5, 16, 80) \ + SIZE_CLASS(6, 16, 96) \ + SIZE_CLASS(7, 16, 112) \ + SIZE_CLASS(8, 16, 128) \ + SIZE_CLASS(9, 32, 160) \ + SIZE_CLASS(10, 32, 192) \ + SIZE_CLASS(11, 32, 224) \ + SIZE_CLASS(12, 32, 256) \ + SIZE_CLASS(13, 64, 320) \ + SIZE_CLASS(14, 64, 384) \ + SIZE_CLASS(15, 64, 448) \ + SIZE_CLASS(16, 64, 512) \ + SIZE_CLASS(17, 128, 640) \ + SIZE_CLASS(18, 128, 768) \ + SIZE_CLASS(19, 128, 896) \ + SIZE_CLASS(20, 128, 1024) \ + SIZE_CLASS(21, 256, 1280) \ + SIZE_CLASS(22, 256, 1536) \ + SIZE_CLASS(23, 256, 1792) \ + SIZE_CLASS(24, 256, 2048) \ + SIZE_CLASS(25, 512, 2560) \ + SIZE_CLASS(26, 512, 3072) \ + SIZE_CLASS(27, 512, 3584) \ + SIZE_CLASS(28, 512, 4096) \ + SIZE_CLASS(29, 1024, 5120) \ + SIZE_CLASS(30, 1024, 6144) \ + SIZE_CLASS(31, 1024, 7168) \ + SIZE_CLASS(32, 1024, 8192) \ + SIZE_CLASS(33, 2048, 10240) \ + SIZE_CLASS(34, 2048, 12288) \ + SIZE_CLASS(35, 2048, 14336) \ + SIZE_CLASS(36, 2048, 16384) \ + SIZE_CLASS(37, 4096, 20480) \ + SIZE_CLASS(38, 4096, 24576) \ + SIZE_CLASS(39, 4096, 28672) \ + +#define NBINS 40 +#define SMALL_MAXCLASS 28672 +#endif + +#if (LG_TINY_MIN == 3 && LG_QUANTUM == 4 && LG_PAGE == 16) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 8, 8) \ + SIZE_CLASS(1, 8, 16) \ + SIZE_CLASS(2, 16, 32) \ + SIZE_CLASS(3, 16, 48) \ + SIZE_CLASS(4, 16, 64) \ + SIZE_CLASS(5, 16, 80) \ + SIZE_CLASS(6, 16, 96) \ + SIZE_CLASS(7, 16, 112) \ + SIZE_CLASS(8, 16, 128) \ + SIZE_CLASS(9, 32, 160) \ + SIZE_CLASS(10, 32, 192) \ + SIZE_CLASS(11, 32, 224) \ + SIZE_CLASS(12, 32, 256) \ + SIZE_CLASS(13, 64, 320) \ + SIZE_CLASS(14, 64, 384) \ + SIZE_CLASS(15, 64, 448) \ + SIZE_CLASS(16, 64, 512) \ + SIZE_CLASS(17, 128, 640) \ + SIZE_CLASS(18, 128, 768) \ + SIZE_CLASS(19, 128, 896) \ + SIZE_CLASS(20, 128, 1024) \ + SIZE_CLASS(21, 256, 1280) \ + SIZE_CLASS(22, 256, 1536) \ + SIZE_CLASS(23, 256, 1792) \ + SIZE_CLASS(24, 256, 2048) \ + SIZE_CLASS(25, 512, 2560) \ + SIZE_CLASS(26, 512, 3072) \ + SIZE_CLASS(27, 512, 3584) \ + SIZE_CLASS(28, 512, 4096) \ + SIZE_CLASS(29, 1024, 5120) \ + SIZE_CLASS(30, 1024, 6144) \ + SIZE_CLASS(31, 1024, 7168) \ + SIZE_CLASS(32, 1024, 8192) \ + SIZE_CLASS(33, 2048, 10240) \ + SIZE_CLASS(34, 2048, 12288) \ + SIZE_CLASS(35, 2048, 14336) \ + SIZE_CLASS(36, 2048, 16384) \ + SIZE_CLASS(37, 4096, 20480) \ + SIZE_CLASS(38, 4096, 24576) \ + SIZE_CLASS(39, 4096, 28672) \ + SIZE_CLASS(40, 4096, 32768) \ + SIZE_CLASS(41, 8192, 40960) \ + SIZE_CLASS(42, 8192, 49152) \ + SIZE_CLASS(43, 8192, 57344) \ + +#define NBINS 44 +#define SMALL_MAXCLASS 57344 +#endif + +#if (LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 12) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 16, 16) \ + SIZE_CLASS(1, 16, 32) \ + SIZE_CLASS(2, 16, 48) \ + SIZE_CLASS(3, 16, 64) \ + SIZE_CLASS(4, 16, 80) \ + SIZE_CLASS(5, 16, 96) \ + SIZE_CLASS(6, 16, 112) \ + SIZE_CLASS(7, 16, 128) \ + SIZE_CLASS(8, 32, 160) \ + SIZE_CLASS(9, 32, 192) \ + SIZE_CLASS(10, 32, 224) \ + SIZE_CLASS(11, 32, 256) \ + SIZE_CLASS(12, 64, 320) \ + SIZE_CLASS(13, 64, 384) \ + SIZE_CLASS(14, 64, 448) \ + SIZE_CLASS(15, 64, 512) \ + SIZE_CLASS(16, 128, 640) \ + SIZE_CLASS(17, 128, 768) \ + SIZE_CLASS(18, 128, 896) \ + SIZE_CLASS(19, 128, 1024) \ + SIZE_CLASS(20, 256, 1280) \ + SIZE_CLASS(21, 256, 1536) \ + SIZE_CLASS(22, 256, 1792) \ + SIZE_CLASS(23, 256, 2048) \ + SIZE_CLASS(24, 512, 2560) \ + SIZE_CLASS(25, 512, 3072) \ + SIZE_CLASS(26, 512, 3584) \ + +#define NBINS 27 +#define SMALL_MAXCLASS 3584 +#endif + +#if (LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 13) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 16, 16) \ + SIZE_CLASS(1, 16, 32) \ + SIZE_CLASS(2, 16, 48) \ + SIZE_CLASS(3, 16, 64) \ + SIZE_CLASS(4, 16, 80) \ + SIZE_CLASS(5, 16, 96) \ + SIZE_CLASS(6, 16, 112) \ + SIZE_CLASS(7, 16, 128) \ + SIZE_CLASS(8, 32, 160) \ + SIZE_CLASS(9, 32, 192) \ + SIZE_CLASS(10, 32, 224) \ + SIZE_CLASS(11, 32, 256) \ + SIZE_CLASS(12, 64, 320) \ + SIZE_CLASS(13, 64, 384) \ + SIZE_CLASS(14, 64, 448) \ + SIZE_CLASS(15, 64, 512) \ + SIZE_CLASS(16, 128, 640) \ + SIZE_CLASS(17, 128, 768) \ + SIZE_CLASS(18, 128, 896) \ + SIZE_CLASS(19, 128, 1024) \ + SIZE_CLASS(20, 256, 1280) \ + SIZE_CLASS(21, 256, 1536) \ + SIZE_CLASS(22, 256, 1792) \ + SIZE_CLASS(23, 256, 2048) \ + SIZE_CLASS(24, 512, 2560) \ + SIZE_CLASS(25, 512, 3072) \ + SIZE_CLASS(26, 512, 3584) \ + SIZE_CLASS(27, 512, 4096) \ + SIZE_CLASS(28, 1024, 5120) \ + SIZE_CLASS(29, 1024, 6144) \ + SIZE_CLASS(30, 1024, 7168) \ + +#define NBINS 31 +#define SMALL_MAXCLASS 7168 +#endif + +#if (LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 14) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 16, 16) \ + SIZE_CLASS(1, 16, 32) \ + SIZE_CLASS(2, 16, 48) \ + SIZE_CLASS(3, 16, 64) \ + SIZE_CLASS(4, 16, 80) \ + SIZE_CLASS(5, 16, 96) \ + SIZE_CLASS(6, 16, 112) \ + SIZE_CLASS(7, 16, 128) \ + SIZE_CLASS(8, 32, 160) \ + SIZE_CLASS(9, 32, 192) \ + SIZE_CLASS(10, 32, 224) \ + SIZE_CLASS(11, 32, 256) \ + SIZE_CLASS(12, 64, 320) \ + SIZE_CLASS(13, 64, 384) \ + SIZE_CLASS(14, 64, 448) \ + SIZE_CLASS(15, 64, 512) \ + SIZE_CLASS(16, 128, 640) \ + SIZE_CLASS(17, 128, 768) \ + SIZE_CLASS(18, 128, 896) \ + SIZE_CLASS(19, 128, 1024) \ + SIZE_CLASS(20, 256, 1280) \ + SIZE_CLASS(21, 256, 1536) \ + SIZE_CLASS(22, 256, 1792) \ + SIZE_CLASS(23, 256, 2048) \ + SIZE_CLASS(24, 512, 2560) \ + SIZE_CLASS(25, 512, 3072) \ + SIZE_CLASS(26, 512, 3584) \ + SIZE_CLASS(27, 512, 4096) \ + SIZE_CLASS(28, 1024, 5120) \ + SIZE_CLASS(29, 1024, 6144) \ + SIZE_CLASS(30, 1024, 7168) \ + SIZE_CLASS(31, 1024, 8192) \ + SIZE_CLASS(32, 2048, 10240) \ + SIZE_CLASS(33, 2048, 12288) \ + SIZE_CLASS(34, 2048, 14336) \ + +#define NBINS 35 +#define SMALL_MAXCLASS 14336 +#endif + +#if (LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 15) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 16, 16) \ + SIZE_CLASS(1, 16, 32) \ + SIZE_CLASS(2, 16, 48) \ + SIZE_CLASS(3, 16, 64) \ + SIZE_CLASS(4, 16, 80) \ + SIZE_CLASS(5, 16, 96) \ + SIZE_CLASS(6, 16, 112) \ + SIZE_CLASS(7, 16, 128) \ + SIZE_CLASS(8, 32, 160) \ + SIZE_CLASS(9, 32, 192) \ + SIZE_CLASS(10, 32, 224) \ + SIZE_CLASS(11, 32, 256) \ + SIZE_CLASS(12, 64, 320) \ + SIZE_CLASS(13, 64, 384) \ + SIZE_CLASS(14, 64, 448) \ + SIZE_CLASS(15, 64, 512) \ + SIZE_CLASS(16, 128, 640) \ + SIZE_CLASS(17, 128, 768) \ + SIZE_CLASS(18, 128, 896) \ + SIZE_CLASS(19, 128, 1024) \ + SIZE_CLASS(20, 256, 1280) \ + SIZE_CLASS(21, 256, 1536) \ + SIZE_CLASS(22, 256, 1792) \ + SIZE_CLASS(23, 256, 2048) \ + SIZE_CLASS(24, 512, 2560) \ + SIZE_CLASS(25, 512, 3072) \ + SIZE_CLASS(26, 512, 3584) \ + SIZE_CLASS(27, 512, 4096) \ + SIZE_CLASS(28, 1024, 5120) \ + SIZE_CLASS(29, 1024, 6144) \ + SIZE_CLASS(30, 1024, 7168) \ + SIZE_CLASS(31, 1024, 8192) \ + SIZE_CLASS(32, 2048, 10240) \ + SIZE_CLASS(33, 2048, 12288) \ + SIZE_CLASS(34, 2048, 14336) \ + SIZE_CLASS(35, 2048, 16384) \ + SIZE_CLASS(36, 4096, 20480) \ + SIZE_CLASS(37, 4096, 24576) \ + SIZE_CLASS(38, 4096, 28672) \ + +#define NBINS 39 +#define SMALL_MAXCLASS 28672 +#endif + +#if (LG_TINY_MIN == 4 && LG_QUANTUM == 4 && LG_PAGE == 16) +#define SIZE_CLASSES_DEFINED +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \ + SIZE_CLASS(0, 16, 16) \ + SIZE_CLASS(1, 16, 32) \ + SIZE_CLASS(2, 16, 48) \ + SIZE_CLASS(3, 16, 64) \ + SIZE_CLASS(4, 16, 80) \ + SIZE_CLASS(5, 16, 96) \ + SIZE_CLASS(6, 16, 112) \ + SIZE_CLASS(7, 16, 128) \ + SIZE_CLASS(8, 32, 160) \ + SIZE_CLASS(9, 32, 192) \ + SIZE_CLASS(10, 32, 224) \ + SIZE_CLASS(11, 32, 256) \ + SIZE_CLASS(12, 64, 320) \ + SIZE_CLASS(13, 64, 384) \ + SIZE_CLASS(14, 64, 448) \ + SIZE_CLASS(15, 64, 512) \ + SIZE_CLASS(16, 128, 640) \ + SIZE_CLASS(17, 128, 768) \ + SIZE_CLASS(18, 128, 896) \ + SIZE_CLASS(19, 128, 1024) \ + SIZE_CLASS(20, 256, 1280) \ + SIZE_CLASS(21, 256, 1536) \ + SIZE_CLASS(22, 256, 1792) \ + SIZE_CLASS(23, 256, 2048) \ + SIZE_CLASS(24, 512, 2560) \ + SIZE_CLASS(25, 512, 3072) \ + SIZE_CLASS(26, 512, 3584) \ + SIZE_CLASS(27, 512, 4096) \ + SIZE_CLASS(28, 1024, 5120) \ + SIZE_CLASS(29, 1024, 6144) \ + SIZE_CLASS(30, 1024, 7168) \ + SIZE_CLASS(31, 1024, 8192) \ + SIZE_CLASS(32, 2048, 10240) \ + SIZE_CLASS(33, 2048, 12288) \ + SIZE_CLASS(34, 2048, 14336) \ + SIZE_CLASS(35, 2048, 16384) \ + SIZE_CLASS(36, 4096, 20480) \ + SIZE_CLASS(37, 4096, 24576) \ + SIZE_CLASS(38, 4096, 28672) \ + SIZE_CLASS(39, 4096, 32768) \ + SIZE_CLASS(40, 8192, 40960) \ + SIZE_CLASS(41, 8192, 49152) \ + SIZE_CLASS(42, 8192, 57344) \ + +#define NBINS 43 +#define SMALL_MAXCLASS 57344 +#endif + +#ifndef SIZE_CLASSES_DEFINED +# error "No size class definitions match configuration" +#endif +#undef SIZE_CLASSES_DEFINED +/* + * The small_size2bin lookup table uses uint8_t to encode each bin index, so we + * cannot support more than 256 small size classes. Further constrain NBINS to + * 255 to support prof_promote, since all small size classes, plus a "not + * small" size class must be stored in 8 bits of arena_chunk_map_t's bits + * field. + */ +#if (NBINS > 255) +# error "Too many small size classes" +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/stats.h b/contrib/jemalloc/include/jemalloc/internal/stats.h new file mode 100644 index 000000000000..27f68e3681cf --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/stats.h @@ -0,0 +1,173 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct tcache_bin_stats_s tcache_bin_stats_t; +typedef struct malloc_bin_stats_s malloc_bin_stats_t; +typedef struct malloc_large_stats_s malloc_large_stats_t; +typedef struct arena_stats_s arena_stats_t; +typedef struct chunk_stats_s chunk_stats_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct tcache_bin_stats_s { + /* + * Number of allocation requests that corresponded to the size of this + * bin. + */ + uint64_t nrequests; +}; + +struct malloc_bin_stats_s { + /* + * Current number of bytes allocated, including objects currently + * cached by tcache. + */ + size_t allocated; + + /* + * Total number of allocation/deallocation requests served directly by + * the bin. Note that tcache may allocate an object, then recycle it + * many times, resulting many increments to nrequests, but only one + * each to nmalloc and ndalloc. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* + * Number of allocation requests that correspond to the size of this + * bin. This includes requests served by tcache, though tcache only + * periodically merges into this counter. + */ + uint64_t nrequests; + + /* Number of tcache fills from this bin. */ + uint64_t nfills; + + /* Number of tcache flushes to this bin. */ + uint64_t nflushes; + + /* Total number of runs created for this bin's size class. */ + uint64_t nruns; + + /* + * Total number of runs reused by extracting them from the runs tree for + * this bin's size class. + */ + uint64_t reruns; + + /* Current number of runs in this bin. */ + size_t curruns; +}; + +struct malloc_large_stats_s { + /* + * Total number of allocation/deallocation requests served directly by + * the arena. Note that tcache may allocate an object, then recycle it + * many times, resulting many increments to nrequests, but only one + * each to nmalloc and ndalloc. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* + * Number of allocation requests that correspond to this size class. + * This includes requests served by tcache, though tcache only + * periodically merges into this counter. + */ + uint64_t nrequests; + + /* Current number of runs of this size class. */ + size_t curruns; +}; + +struct arena_stats_s { + /* Number of bytes currently mapped. */ + size_t mapped; + + /* + * Total number of purge sweeps, total number of madvise calls made, + * and total pages purged in order to keep dirty unused memory under + * control. + */ + uint64_t npurge; + uint64_t nmadvise; + uint64_t purged; + + /* Per-size-category statistics. */ + size_t allocated_large; + uint64_t nmalloc_large; + uint64_t ndalloc_large; + uint64_t nrequests_large; + + /* + * One element for each possible size class, including sizes that + * overlap with bin size classes. This is necessary because ipalloc() + * sometimes has to use such large objects in order to assure proper + * alignment. + */ + malloc_large_stats_t *lstats; +}; + +struct chunk_stats_s { + /* Number of chunks that were allocated. */ + uint64_t nchunks; + + /* High-water mark for number of chunks allocated. */ + size_t highchunks; + + /* + * Current number of chunks allocated. This value isn't maintained for + * any other purpose, so keep track of it in order to be able to set + * highchunks. + */ + size_t curchunks; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern bool opt_stats_print; + +extern size_t stats_cactive; + +void stats_print(void (*write)(void *, const char *), void *cbopaque, + const char *opts); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +size_t stats_cactive_get(void); +void stats_cactive_add(size_t size); +void stats_cactive_sub(size_t size); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_STATS_C_)) +JEMALLOC_INLINE size_t +stats_cactive_get(void) +{ + + return (atomic_read_z(&stats_cactive)); +} + +JEMALLOC_INLINE void +stats_cactive_add(size_t size) +{ + + atomic_add_z(&stats_cactive, size); +} + +JEMALLOC_INLINE void +stats_cactive_sub(size_t size) +{ + + atomic_sub_z(&stats_cactive, size); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/tcache.h b/contrib/jemalloc/include/jemalloc/internal/tcache.h new file mode 100644 index 000000000000..9d8c992d7f24 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/tcache.h @@ -0,0 +1,494 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct tcache_bin_info_s tcache_bin_info_t; +typedef struct tcache_bin_s tcache_bin_t; +typedef struct tcache_s tcache_t; + +/* + * tcache pointers close to NULL are used to encode state information that is + * used for two purposes: preventing thread caching on a per thread basis and + * cleaning up during thread shutdown. + */ +#define TCACHE_STATE_DISABLED ((tcache_t *)(uintptr_t)1) +#define TCACHE_STATE_REINCARNATED ((tcache_t *)(uintptr_t)2) +#define TCACHE_STATE_PURGATORY ((tcache_t *)(uintptr_t)3) +#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY + +/* + * Absolute maximum number of cache slots for each small bin in the thread + * cache. This is an additional constraint beyond that imposed as: twice the + * number of regions per run for this size class. + * + * This constant must be an even number. + */ +#define TCACHE_NSLOTS_SMALL_MAX 200 + +/* Number of cache slots for large size classes. */ +#define TCACHE_NSLOTS_LARGE 20 + +/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */ +#define LG_TCACHE_MAXCLASS_DEFAULT 15 + +/* + * TCACHE_GC_SWEEP is the approximate number of allocation events between + * full GC sweeps. Integer rounding may cause the actual number to be + * slightly higher, since GC is performed incrementally. + */ +#define TCACHE_GC_SWEEP 8192 + +/* Number of tcache allocation/deallocation events between incremental GCs. */ +#define TCACHE_GC_INCR \ + ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1)) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +typedef enum { + tcache_enabled_false = 0, /* Enable cast to/from bool. */ + tcache_enabled_true = 1, + tcache_enabled_default = 2 +} tcache_enabled_t; + +/* + * Read-only information associated with each element of tcache_t's tbins array + * is stored separately, mainly to reduce memory usage. + */ +struct tcache_bin_info_s { + unsigned ncached_max; /* Upper limit on ncached. */ +}; + +struct tcache_bin_s { + tcache_bin_stats_t tstats; + int low_water; /* Min # cached since last GC. */ + unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ + unsigned ncached; /* # of cached objects. */ + void **avail; /* Stack of available objects. */ +}; + +struct tcache_s { + ql_elm(tcache_t) link; /* Used for aggregating stats. */ + uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */ + arena_t *arena; /* This thread's arena. */ + unsigned ev_cnt; /* Event count since incremental GC. */ + unsigned next_gc_bin; /* Next bin to GC. */ + tcache_bin_t tbins[1]; /* Dynamically sized. */ + /* + * The pointer stacks associated with tbins follow as a contiguous + * array. During tcache initialization, the avail pointer in each + * element of tbins is initialized to point to the proper offset within + * this array. + */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern bool opt_tcache; +extern ssize_t opt_lg_tcache_max; + +extern tcache_bin_info_t *tcache_bin_info; + +/* + * Number of tcache bins. There are NBINS small-object bins, plus 0 or more + * large-object bins. + */ +extern size_t nhbins; + +/* Maximum cached size class. */ +extern size_t tcache_maxclass; + +void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, + tcache_t *tcache); +void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, + tcache_t *tcache); +void tcache_arena_associate(tcache_t *tcache, arena_t *arena); +void tcache_arena_dissociate(tcache_t *tcache); +tcache_t *tcache_create(arena_t *arena); +void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, + size_t binind); +void tcache_destroy(tcache_t *tcache); +void tcache_thread_cleanup(void *arg); +void tcache_stats_merge(tcache_t *tcache, arena_t *arena); +bool tcache_boot0(void); +bool tcache_boot1(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache, tcache_t *) +malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache_enabled, tcache_enabled_t) + +void tcache_event(tcache_t *tcache); +void tcache_flush(void); +bool tcache_enabled_get(void); +tcache_t *tcache_get(bool create); +void tcache_enabled_set(bool enabled); +void *tcache_alloc_easy(tcache_bin_t *tbin); +void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); +void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero); +void tcache_dalloc_small(tcache_t *tcache, void *ptr); +void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) +/* Map of thread-specific caches. */ +malloc_tsd_externs(tcache, tcache_t *) +malloc_tsd_funcs(JEMALLOC_INLINE, tcache, tcache_t *, NULL, + tcache_thread_cleanup) +/* Per thread flag that allows thread caches to be disabled. */ +malloc_tsd_externs(tcache_enabled, tcache_enabled_t) +malloc_tsd_funcs(JEMALLOC_INLINE, tcache_enabled, tcache_enabled_t, + tcache_enabled_default, malloc_tsd_no_cleanup) + +JEMALLOC_INLINE void +tcache_flush(void) +{ + tcache_t *tcache; + + cassert(config_tcache); + + tcache = *tcache_tsd_get(); + if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) + return; + tcache_destroy(tcache); + tcache = NULL; + tcache_tsd_set(&tcache); +} + +JEMALLOC_INLINE bool +tcache_enabled_get(void) +{ + tcache_enabled_t tcache_enabled; + + cassert(config_tcache); + + tcache_enabled = *tcache_enabled_tsd_get(); + if (tcache_enabled == tcache_enabled_default) { + tcache_enabled = (tcache_enabled_t)opt_tcache; + tcache_enabled_tsd_set(&tcache_enabled); + } + + return ((bool)tcache_enabled); +} + +JEMALLOC_INLINE void +tcache_enabled_set(bool enabled) +{ + tcache_enabled_t tcache_enabled; + tcache_t *tcache; + + cassert(config_tcache); + + tcache_enabled = (tcache_enabled_t)enabled; + tcache_enabled_tsd_set(&tcache_enabled); + tcache = *tcache_tsd_get(); + if (enabled) { + if (tcache == TCACHE_STATE_DISABLED) { + tcache = NULL; + tcache_tsd_set(&tcache); + } + } else /* disabled */ { + if (tcache > TCACHE_STATE_MAX) { + tcache_destroy(tcache); + tcache = NULL; + } + if (tcache == NULL) { + tcache = TCACHE_STATE_DISABLED; + tcache_tsd_set(&tcache); + } + } +} + +JEMALLOC_INLINE tcache_t * +tcache_get(bool create) +{ + tcache_t *tcache; + + if (config_tcache == false) + return (NULL); + if (config_lazy_lock && isthreaded == false) + return (NULL); + + tcache = *tcache_tsd_get(); + if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) { + if (tcache == TCACHE_STATE_DISABLED) + return (NULL); + if (tcache == NULL) { + if (create == false) { + /* + * Creating a tcache here would cause + * allocation as a side effect of free(). + * Ordinarily that would be okay since + * tcache_create() failure is a soft failure + * that doesn't propagate. However, if TLS + * data are freed via free() as in glibc, + * subtle corruption could result from setting + * a TLS variable after its backing memory is + * freed. + */ + return (NULL); + } + if (tcache_enabled_get() == false) { + tcache_enabled_set(false); /* Memoize. */ + return (NULL); + } + return (tcache_create(choose_arena(NULL))); + } + if (tcache == TCACHE_STATE_PURGATORY) { + /* + * Make a note that an allocator function was called + * after tcache_thread_cleanup() was called. + */ + tcache = TCACHE_STATE_REINCARNATED; + tcache_tsd_set(&tcache); + return (NULL); + } + if (tcache == TCACHE_STATE_REINCARNATED) + return (NULL); + not_reached(); + } + + return (tcache); +} + +JEMALLOC_INLINE void +tcache_event(tcache_t *tcache) +{ + + if (TCACHE_GC_INCR == 0) + return; + + tcache->ev_cnt++; + assert(tcache->ev_cnt <= TCACHE_GC_INCR); + if (tcache->ev_cnt == TCACHE_GC_INCR) { + size_t binind = tcache->next_gc_bin; + tcache_bin_t *tbin = &tcache->tbins[binind]; + tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; + + if (tbin->low_water > 0) { + /* + * Flush (ceiling) 3/4 of the objects below the low + * water mark. + */ + if (binind < NBINS) { + tcache_bin_flush_small(tbin, binind, + tbin->ncached - tbin->low_water + + (tbin->low_water >> 2), tcache); + } else { + tcache_bin_flush_large(tbin, binind, + tbin->ncached - tbin->low_water + + (tbin->low_water >> 2), tcache); + } + /* + * Reduce fill count by 2X. Limit lg_fill_div such that + * the fill count is always at least 1. + */ + if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) + >= 1) + tbin->lg_fill_div++; + } else if (tbin->low_water < 0) { + /* + * Increase fill count by 2X. Make sure lg_fill_div + * stays greater than 0. + */ + if (tbin->lg_fill_div > 1) + tbin->lg_fill_div--; + } + tbin->low_water = tbin->ncached; + + tcache->next_gc_bin++; + if (tcache->next_gc_bin == nhbins) + tcache->next_gc_bin = 0; + tcache->ev_cnt = 0; + } +} + +JEMALLOC_INLINE void * +tcache_alloc_easy(tcache_bin_t *tbin) +{ + void *ret; + + if (tbin->ncached == 0) { + tbin->low_water = -1; + return (NULL); + } + tbin->ncached--; + if ((int)tbin->ncached < tbin->low_water) + tbin->low_water = tbin->ncached; + ret = tbin->avail[tbin->ncached]; + return (ret); +} + +JEMALLOC_INLINE void * +tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) +{ + void *ret; + size_t binind; + tcache_bin_t *tbin; + + binind = SMALL_SIZE2BIN(size); + assert(binind < NBINS); + tbin = &tcache->tbins[binind]; + ret = tcache_alloc_easy(tbin); + if (ret == NULL) { + ret = tcache_alloc_small_hard(tcache, tbin, binind); + if (ret == NULL) + return (NULL); + } + assert(arena_salloc(ret, false) == arena_bin_info[binind].reg_size); + + if (zero == false) { + if (config_fill) { + if (opt_junk) { + arena_alloc_junk_small(ret, + &arena_bin_info[binind], false); + } else if (opt_zero) + memset(ret, 0, size); + } + } else { + if (config_fill && opt_junk) { + arena_alloc_junk_small(ret, &arena_bin_info[binind], + true); + } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + memset(ret, 0, size); + } + + if (config_stats) + tbin->tstats.nrequests++; + if (config_prof) + tcache->prof_accumbytes += arena_bin_info[binind].reg_size; + tcache_event(tcache); + return (ret); +} + +JEMALLOC_INLINE void * +tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) +{ + void *ret; + size_t binind; + tcache_bin_t *tbin; + + size = PAGE_CEILING(size); + assert(size <= tcache_maxclass); + binind = NBINS + (size >> LG_PAGE) - 1; + assert(binind < nhbins); + tbin = &tcache->tbins[binind]; + ret = tcache_alloc_easy(tbin); + if (ret == NULL) { + /* + * Only allocate one large object at a time, because it's quite + * expensive to create one and not use it. + */ + ret = arena_malloc_large(tcache->arena, size, zero); + if (ret == NULL) + return (NULL); + } else { + if (config_prof) { + arena_chunk_t *chunk = + (arena_chunk_t *)CHUNK_ADDR2BASE(ret); + size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> + LG_PAGE); + chunk->map[pageind-map_bias].bits &= + ~CHUNK_MAP_CLASS_MASK; + } + if (zero == false) { + if (config_fill) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } + } else { + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + memset(ret, 0, size); + } + + if (config_stats) + tbin->tstats.nrequests++; + if (config_prof) + tcache->prof_accumbytes += size; + } + + tcache_event(tcache); + return (ret); +} + +JEMALLOC_INLINE void +tcache_dalloc_small(tcache_t *tcache, void *ptr) +{ + arena_t *arena; + arena_chunk_t *chunk; + arena_run_t *run; + arena_bin_t *bin; + tcache_bin_t *tbin; + tcache_bin_info_t *tbin_info; + size_t pageind, binind; + arena_chunk_map_t *mapelm; + + assert(arena_salloc(ptr, false) <= SMALL_MAXCLASS); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena = chunk->arena; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + mapelm = &chunk->map[pageind-map_bias]; + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + (mapelm->bits >> LG_PAGE)) << LG_PAGE)); + bin = run->bin; + binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / + sizeof(arena_bin_t); + assert(binind < NBINS); + + if (config_fill && opt_junk) + arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); + + tbin = &tcache->tbins[binind]; + tbin_info = &tcache_bin_info[binind]; + if (tbin->ncached == tbin_info->ncached_max) { + tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >> + 1), tcache); + } + assert(tbin->ncached < tbin_info->ncached_max); + tbin->avail[tbin->ncached] = ptr; + tbin->ncached++; + + tcache_event(tcache); +} + +JEMALLOC_INLINE void +tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) +{ + size_t binind; + tcache_bin_t *tbin; + tcache_bin_info_t *tbin_info; + + assert((size & PAGE_MASK) == 0); + assert(arena_salloc(ptr, false) > SMALL_MAXCLASS); + assert(arena_salloc(ptr, false) <= tcache_maxclass); + + binind = NBINS + (size >> LG_PAGE) - 1; + + if (config_fill && opt_junk) + memset(ptr, 0x5a, size); + + tbin = &tcache->tbins[binind]; + tbin_info = &tcache_bin_info[binind]; + if (tbin->ncached == tbin_info->ncached_max) { + tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >> + 1), tcache); + } + assert(tbin->ncached < tbin_info->ncached_max); + tbin->avail[tbin->ncached] = ptr; + tbin->ncached++; + + tcache_event(tcache); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/tsd.h b/contrib/jemalloc/include/jemalloc/internal/tsd.h new file mode 100644 index 000000000000..35ae5e3c78bd --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/tsd.h @@ -0,0 +1,309 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Maximum number of malloc_tsd users with cleanup functions. */ +#define MALLOC_TSD_CLEANUPS_MAX 8 + +typedef struct malloc_tsd_cleanup_s malloc_tsd_cleanup_t; +struct malloc_tsd_cleanup_s { + bool (*f)(void *); + void *arg; +}; + +/* + * TLS/TSD-agnostic macro-based implementation of thread-specific data. There + * are four macros that support (at least) three use cases: file-private, + * library-private, and library-private inlined. Following is an example + * library-private tsd variable: + * + * In example.h: + * typedef struct { + * int x; + * int y; + * } example_t; + * #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0}) + * malloc_tsd_protos(, example, example_t *) + * malloc_tsd_externs(example, example_t *) + * In example.c: + * malloc_tsd_data(, example, example_t *, EX_INITIALIZER) + * malloc_tsd_funcs(, example, example_t *, EX_INITIALIZER, + * example_tsd_cleanup) + * + * The result is a set of generated functions, e.g.: + * + * bool example_tsd_boot(void) {...} + * example_t **example_tsd_get() {...} + * void example_tsd_set(example_t **val) {...} + * + * Note that all of the functions deal in terms of (a_type *) rather than + * (a_type) so that it is possible to support non-pointer types (unlike + * pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is + * cast to (void *). This means that the cleanup function needs to cast *and* + * dereference the function argument, e.g.: + * + * void + * example_tsd_cleanup(void *arg) + * { + * example_t *example = *(example_t **)arg; + * + * [...] + * if ([want the cleanup function to be called again]) { + * example_tsd_set(&example); + * } + * } + * + * If example_tsd_set() is called within example_tsd_cleanup(), it will be + * called again. This is similar to how pthreads TSD destruction works, except + * that pthreads only calls the cleanup function again if the value was set to + * non-NULL. + */ + +/* malloc_tsd_protos(). */ +#define malloc_tsd_protos(a_attr, a_name, a_type) \ +a_attr bool \ +a_name##_tsd_boot(void); \ +a_attr a_type * \ +a_name##_tsd_get(void); \ +a_attr void \ +a_name##_tsd_set(a_type *val); + +/* malloc_tsd_externs(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_externs(a_name, a_type) \ +extern __thread a_type a_name##_tls; \ +extern __thread bool a_name##_initialized; \ +extern bool a_name##_booted; +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_externs(a_name, a_type) \ +extern __thread a_type a_name##_tls; \ +extern pthread_key_t a_name##_tsd; \ +extern bool a_name##_booted; +#else +#define malloc_tsd_externs(a_name, a_type) \ +extern pthread_key_t a_name##_tsd; \ +extern bool a_name##_booted; +#endif + +/* malloc_tsd_data(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr __thread a_type JEMALLOC_TLS_MODEL \ + a_name##_tls = a_initializer; \ +a_attr __thread bool JEMALLOC_TLS_MODEL \ + a_name##_initialized = false; \ +a_attr bool a_name##_booted = false; +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr __thread a_type JEMALLOC_TLS_MODEL \ + a_name##_tls = a_initializer; \ +a_attr pthread_key_t a_name##_tsd; \ +a_attr bool a_name##_booted = false; +#else +#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \ +a_attr pthread_key_t a_name##_tsd; \ +a_attr bool a_name##_booted = false; +#endif + +/* malloc_tsd_funcs(). */ +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Initialization/cleanup. */ \ +a_attr bool \ +a_name##_tsd_cleanup_wrapper(void *arg) \ +{ \ + bool (*cleanup)(void *) = arg; \ + \ + if (a_name##_initialized) { \ + a_name##_initialized = false; \ + cleanup(&a_name##_tls); \ + } \ + return (a_name##_initialized); \ +} \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + malloc_tsd_cleanup_register( \ + &a_name##_tsd_cleanup_wrapper, a_cleanup); \ + } \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + \ + assert(a_name##_booted); \ + return (&a_name##_tls); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + \ + assert(a_name##_booted); \ + a_name##_tls = (*val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + a_name##_initialized = true; \ +} +#elif (defined(JEMALLOC_TLS)) +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Initialization/cleanup. */ \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + if (pthread_key_create(&a_name##_tsd, a_cleanup) != 0) \ + return (true); \ + } \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + \ + assert(a_name##_booted); \ + return (&a_name##_tls); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + \ + assert(a_name##_booted); \ + a_name##_tls = (*val); \ + if (a_cleanup != malloc_tsd_no_cleanup) { \ + if (pthread_setspecific(a_name##_tsd, \ + (void *)(&a_name##_tls))) { \ + malloc_write(": Error" \ + " setting TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + } \ + } \ +} +#else +#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \ + a_cleanup) \ +/* Data structure. */ \ +typedef struct { \ + bool isstatic; \ + bool initialized; \ + a_type val; \ +} a_name##_tsd_wrapper_t; \ +/* Initialization/cleanup. */ \ +a_attr void \ +a_name##_tsd_cleanup_wrapper(void *arg) \ +{ \ + a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)arg;\ + \ + if (a_cleanup != malloc_tsd_no_cleanup && \ + wrapper->initialized) { \ + wrapper->initialized = false; \ + a_cleanup(&wrapper->val); \ + if (wrapper->initialized) { \ + /* Trigger another cleanup round. */ \ + if (pthread_setspecific(a_name##_tsd, \ + (void *)wrapper)) { \ + malloc_write(": Error" \ + " setting TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + } \ + return; \ + } \ + } \ + if (wrapper->isstatic == false) \ + malloc_tsd_dalloc(wrapper); \ +} \ +a_attr bool \ +a_name##_tsd_boot(void) \ +{ \ + \ + if (pthread_key_create(&a_name##_tsd, \ + a_name##_tsd_cleanup_wrapper) != 0) \ + return (true); \ + a_name##_booted = true; \ + return (false); \ +} \ +/* Get/set. */ \ +a_attr a_name##_tsd_wrapper_t * \ +a_name##_tsd_get_wrapper(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \ + pthread_getspecific(a_name##_tsd); \ + \ + if (wrapper == NULL) { \ + wrapper = (a_name##_tsd_wrapper_t *) \ + malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \ + if (wrapper == NULL) { \ + static a_name##_tsd_wrapper_t \ + a_name##_tsd_static_data = \ + {true, false, a_initializer}; \ + malloc_write(": Error allocating" \ + " TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + wrapper = &a_name##_tsd_static_data; \ + } else { \ + static a_type tsd_static_data = a_initializer; \ + wrapper->isstatic = false; \ + wrapper->val = tsd_static_data; \ + } \ + if (pthread_setspecific(a_name##_tsd, \ + (void *)wrapper)) { \ + malloc_write(": Error setting" \ + " TSD for "#a_name"\n"); \ + if (opt_abort) \ + abort(); \ + } \ + } \ + return (wrapper); \ +} \ +a_attr a_type * \ +a_name##_tsd_get(void) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + return (&wrapper->val); \ +} \ +a_attr void \ +a_name##_tsd_set(a_type *val) \ +{ \ + a_name##_tsd_wrapper_t *wrapper; \ + \ + assert(a_name##_booted); \ + wrapper = a_name##_tsd_get_wrapper(); \ + wrapper->val = *(val); \ + if (a_cleanup != malloc_tsd_no_cleanup) \ + wrapper->initialized = true; \ +} +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *malloc_tsd_malloc(size_t size); +void malloc_tsd_dalloc(void *wrapper); +void malloc_tsd_no_cleanup(void *); +void malloc_tsd_cleanup_register(bool (*f)(void *), void *arg); +void malloc_tsd_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/internal/util.h b/contrib/jemalloc/include/jemalloc/internal/util.h new file mode 100644 index 000000000000..d360ae3f5a8f --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/util.h @@ -0,0 +1,146 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Size of stack-allocated buffer passed to buferror(). */ +#define BUFERROR_BUF 64 + +/* + * Size of stack-allocated buffer used by malloc_{,v,vc}printf(). This must be + * large enough for all possible uses within jemalloc. + */ +#define MALLOC_PRINTF_BUFSIZE 4096 + +/* + * Wrap a cpp argument that contains commas such that it isn't broken up into + * multiple arguments. + */ +#define JEMALLOC_CONCAT(...) __VA_ARGS__ + +/* + * Silence compiler warnings due to uninitialized values. This is used + * wherever the compiler fails to recognize that the variable is never used + * uninitialized. + */ +#ifdef JEMALLOC_CC_SILENCE +# define JEMALLOC_CC_SILENCE_INIT(v) = v +#else +# define JEMALLOC_CC_SILENCE_INIT(v) +#endif + +/* + * Define a custom assert() in order to reduce the chances of deadlock during + * assertion failure. + */ +#ifndef assert +#define assert(e) do { \ + if (config_debug && !(e)) { \ + malloc_printf( \ + ": %s:%d: Failed assertion: \"%s\"\n", \ + __FILE__, __LINE__, #e); \ + abort(); \ + } \ +} while (0) +#endif + +/* Use to assert a particular configuration, e.g., cassert(config_debug). */ +#define cassert(c) do { \ + if ((c) == false) \ + assert(false); \ +} while (0) + +#ifndef not_reached +#define not_reached() do { \ + if (config_debug) { \ + malloc_printf( \ + ": %s:%d: Unreachable code reached\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ +} while (0) +#endif + +#ifndef not_implemented +#define not_implemented() do { \ + if (config_debug) { \ + malloc_printf(": %s:%d: Not implemented\n", \ + __FILE__, __LINE__); \ + abort(); \ + } \ +} while (0) +#endif + +#define assert_not_implemented(e) do { \ + if (config_debug && !(e)) \ + not_implemented(); \ +} while (0) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern void (*je_malloc_message)(void *wcbopaque, const char *s); + +int buferror(int errnum, char *buf, size_t buflen); +uintmax_t malloc_strtoumax(const char *nptr, char **endptr, int base); + +/* + * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating + * point math. + */ +int malloc_vsnprintf(char *str, size_t size, const char *format, + va_list ap); +int malloc_snprintf(char *str, size_t size, const char *format, ...) + JEMALLOC_ATTR(format(printf, 3, 4)); +void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, va_list ap); +void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, + const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); +void malloc_printf(const char *format, ...) + JEMALLOC_ATTR(format(printf, 1, 2)); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +size_t pow2_ceil(size_t x); +void malloc_write(const char *s); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_)) +/* Compute the smallest power of 2 that is >= x. */ +JEMALLOC_INLINE size_t +pow2_ceil(size_t x) +{ + + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; +#if (LG_SIZEOF_PTR == 3) + x |= x >> 32; +#endif + x++; + return (x); +} + +/* + * Wrapper around malloc_message() that avoids the need for + * je_malloc_message(...) throughout the code. + */ +JEMALLOC_INLINE void +malloc_write(const char *s) +{ + + je_malloc_message(NULL, s); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/contrib/jemalloc/include/jemalloc/jemalloc.h b/contrib/jemalloc/include/jemalloc/jemalloc.h new file mode 100644 index 000000000000..d310f46d790f --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/jemalloc.h @@ -0,0 +1,141 @@ +#ifndef JEMALLOC_H_ +#define JEMALLOC_H_ +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#define JEMALLOC_VERSION "1.0.0-266-gb57d3ec571c6551231be62b7bf92c084a8c8291c" +#define JEMALLOC_VERSION_MAJOR 1 +#define JEMALLOC_VERSION_MINOR 0 +#define JEMALLOC_VERSION_BUGFIX 0 +#define JEMALLOC_VERSION_NREV 266 +#define JEMALLOC_VERSION_GID "b57d3ec571c6551231be62b7bf92c084a8c8291c" + +#include "jemalloc_defs.h" +#include "jemalloc_FreeBSD.h" + +#ifdef JEMALLOC_EXPERIMENTAL +#define ALLOCM_LG_ALIGN(la) (la) +#if LG_SIZEOF_PTR == 2 +#define ALLOCM_ALIGN(a) (ffs(a)-1) +#else +#define ALLOCM_ALIGN(a) ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) +#endif +#define ALLOCM_ZERO ((int)0x40) +#define ALLOCM_NO_MOVE ((int)0x80) + +#define ALLOCM_SUCCESS 0 +#define ALLOCM_ERR_OOM 1 +#define ALLOCM_ERR_NOT_MOVED 2 +#endif + +/* + * The je_ prefix on the following public symbol declarations is an artifact of + * namespace management, and should be omitted in application code unless + * JEMALLOC_NO_DEMANGLE is defined (see below). + */ +extern const char *je_malloc_conf; +extern void (*je_malloc_message)(void *, const char *); + +void *je_malloc(size_t size) JEMALLOC_ATTR(malloc); +void *je_calloc(size_t num, size_t size) JEMALLOC_ATTR(malloc); +int je_posix_memalign(void **memptr, size_t alignment, size_t size) + JEMALLOC_ATTR(nonnull(1)); +void *je_aligned_alloc(size_t alignment, size_t size) JEMALLOC_ATTR(malloc); +void *je_realloc(void *ptr, size_t size); +void je_free(void *ptr); + +size_t je_malloc_usable_size(const void *ptr); +void je_malloc_stats_print(void (*write_cb)(void *, const char *), + void *je_cbopaque, const char *opts); +int je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, + size_t newlen); +int je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp); +int je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen); + +#ifdef JEMALLOC_EXPERIMENTAL +int je_allocm(void **ptr, size_t *rsize, size_t size, int flags) + JEMALLOC_ATTR(nonnull(1)); +int je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, + int flags) JEMALLOC_ATTR(nonnull(1)); +int je_sallocm(const void *ptr, size_t *rsize, int flags) + JEMALLOC_ATTR(nonnull(1)); +int je_dallocm(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1)); +int je_nallocm(size_t *rsize, size_t size, int flags); +#endif + +/* + * By default application code must explicitly refer to mangled symbol names, + * so that it is possible to use jemalloc in conjunction with another allocator + * in the same application. Define JEMALLOC_MANGLE in order to cause automatic + * name mangling that matches the API prefixing that happened as a result of + * --with-mangling and/or --with-jemalloc-prefix configuration settings. + */ +#ifdef JEMALLOC_MANGLE +#ifndef JEMALLOC_NO_DEMANGLE +#define JEMALLOC_NO_DEMANGLE +#endif +#define malloc_conf je_malloc_conf +#define malloc_message je_malloc_message +#define malloc je_malloc +#define calloc je_calloc +#define posix_memalign je_posix_memalign +#define aligned_alloc je_aligned_alloc +#define realloc je_realloc +#define free je_free +#define malloc_usable_size je_malloc_usable_size +#define malloc_stats_print je_malloc_stats_print +#define mallctl je_mallctl +#define mallctlnametomib je_mallctlnametomib +#define mallctlbymib je_mallctlbymib +#define memalign je_memalign +#define valloc je_valloc +#ifdef JEMALLOC_EXPERIMENTAL +#define allocm je_allocm +#define rallocm je_rallocm +#define sallocm je_sallocm +#define dallocm je_dallocm +#define nallocm je_nallocm +#endif +#endif + +/* + * The je_* macros can be used as stable alternative names for the public + * jemalloc API if JEMALLOC_NO_DEMANGLE is defined. This is primarily meant + * for use in jemalloc itself, but it can be used by application code to + * provide isolation from the name mangling specified via --with-mangling + * and/or --with-jemalloc-prefix. + */ +#ifndef JEMALLOC_NO_DEMANGLE +#undef je_malloc_conf +#undef je_malloc_message +#undef je_malloc +#undef je_calloc +#undef je_posix_memalign +#undef je_aligned_alloc +#undef je_realloc +#undef je_free +#undef je_malloc_usable_size +#undef je_malloc_stats_print +#undef je_mallctl +#undef je_mallctlnametomib +#undef je_mallctlbymib +#undef je_memalign +#undef je_valloc +#ifdef JEMALLOC_EXPERIMENTAL +#undef je_allocm +#undef je_rallocm +#undef je_sallocm +#undef je_dallocm +#undef je_nallocm +#endif +#endif + +#ifdef __cplusplus +}; +#endif +#endif /* JEMALLOC_H_ */ diff --git a/contrib/jemalloc/include/jemalloc/jemalloc_FreeBSD.h b/contrib/jemalloc/include/jemalloc/jemalloc_FreeBSD.h new file mode 100644 index 000000000000..2c5797fde876 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/jemalloc_FreeBSD.h @@ -0,0 +1,76 @@ +/* + * Override settings that were generated in jemalloc_defs.h as necessary. + */ + +#undef JEMALLOC_OVERRIDE_VALLOC + +#ifndef MALLOC_PRODUCTION +#define JEMALLOC_DEBUG +#endif + +/* + * The following are architecture-dependent, so conditionally define them for + * each supported architecture. + */ +#undef CPU_SPINWAIT +#undef JEMALLOC_TLS_MODEL +#undef STATIC_PAGE_SHIFT +#undef LG_SIZEOF_PTR +#undef LG_SIZEOF_INT +#undef LG_SIZEOF_LONG +#undef LG_SIZEOF_INTMAX_T + +#ifdef __i386__ +# define LG_SIZEOF_PTR 2 +# define CPU_SPINWAIT __asm__ volatile("pause") +# define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) +#endif +#ifdef __ia64__ +# define LG_SIZEOF_PTR 3 +#endif +#ifdef __sparc64__ +# define LG_SIZEOF_PTR 3 +# define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) +#endif +#ifdef __amd64__ +# define LG_SIZEOF_PTR 3 +# define CPU_SPINWAIT __asm__ volatile("pause") +# define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) +#endif +#ifdef __arm__ +# define LG_SIZEOF_PTR 2 +#endif +#ifdef __mips__ +# define LG_SIZEOF_PTR 2 +#endif +#ifdef __powerpc64__ +# define LG_SIZEOF_PTR 3 +#elif defined(__powerpc__) +# define LG_SIZEOF_PTR 2 +#endif + +#ifndef JEMALLOC_TLS_MODEL +# define JEMALLOC_TLS_MODEL /* Default. */ +#endif +#ifdef __clang__ +# undef JEMALLOC_TLS_MODEL +# define JEMALLOC_TLS_MODEL /* clang does not support tls_model yet. */ +#endif + +#define STATIC_PAGE_SHIFT PAGE_SHIFT +#define LG_SIZEOF_INT 2 +#define LG_SIZEOF_LONG LG_SIZEOF_PTR +#define LG_SIZEOF_INTMAX_T 3 + +/* Disable lazy-lock machinery, mangle isthreaded, and adjust its type. */ +#undef JEMALLOC_LAZY_LOCK +extern int __isthreaded; +#define isthreaded ((bool)__isthreaded) + +/* Mangle. */ +#define open _open +#define read _read +#define write _write +#define close _close +#define pthread_mutex_lock _pthread_mutex_lock +#define pthread_mutex_unlock _pthread_mutex_unlock diff --git a/contrib/jemalloc/include/jemalloc/jemalloc_defs.h b/contrib/jemalloc/include/jemalloc/jemalloc_defs.h new file mode 100644 index 000000000000..31dc8382e48c --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/jemalloc_defs.h @@ -0,0 +1,242 @@ +/* include/jemalloc/jemalloc_defs.h. Generated from jemalloc_defs.h.in by configure. */ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +/* #undef JEMALLOC_PREFIX */ +/* #undef JEMALLOC_CPREFIX */ + +/* + * Name mangling for public symbols is controlled by --with-mangling and + * --with-jemalloc-prefix. With default settings the je_ prefix is stripped by + * these macro definitions. + */ +#define je_malloc_conf malloc_conf +#define je_malloc_message malloc_message +#define je_malloc malloc +#define je_calloc calloc +#define je_posix_memalign posix_memalign +#define je_aligned_alloc aligned_alloc +#define je_realloc realloc +#define je_free free +#define je_malloc_usable_size malloc_usable_size +#define je_malloc_stats_print malloc_stats_print +#define je_mallctl mallctl +#define je_mallctlnametomib mallctlnametomib +#define je_mallctlbymib mallctlbymib +/* #undef je_memalign */ +#define je_valloc valloc +#define je_allocm allocm +#define je_rallocm rallocm +#define je_sallocm sallocm +#define je_dallocm dallocm +#define je_nallocm nallocm + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE "" +#define JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix) string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT __asm__ volatile("pause") + +/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */ +#define JEMALLOC_ATOMIC9 1 + +/* + * Defined if OSAtomic*() functions are available, as provided by Darwin, and + * documented in the atomic(3) manual page. + */ +/* #undef JEMALLOC_OSATOMIC */ + +/* + * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and + * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite + * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the + * functions are defined in libgcc instead of being inlines) + */ +/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 */ + +/* + * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and + * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite + * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the + * functions are defined in libgcc instead of being inlines) + */ +/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 */ + +/* + * Defined if OSSpin*() functions are available, as provided by Darwin, and + * documented in the spinlock(3) manual page. + */ +/* #undef JEMALLOC_OSSPIN */ + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +#define JEMALLOC_MALLOC_THREAD_CLEANUP + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +/* #undef JEMALLOC_THREADED_INIT */ + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +#define JEMALLOC_MUTEX_INIT_CB 1 + +/* Defined if __attribute__((...)) syntax is supported. */ +#define JEMALLOC_HAVE_ATTR +#ifdef JEMALLOC_HAVE_ATTR +# define JEMALLOC_CATTR(s, a) __attribute__((s)) +# define JEMALLOC_ATTR(s) JEMALLOC_CATTR(s,) +#else +# define JEMALLOC_CATTR(s, a) a +# define JEMALLOC_ATTR(s) JEMALLOC_CATTR(s,) +#endif + +/* Defined if sbrk() is supported. */ +#define JEMALLOC_HAVE_SBRK + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) + +/* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ +#define JEMALLOC_CC_SILENCE + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* + * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects. + * This makes it possible to allocate/deallocate objects without any locking + * when the cache is in the steady state. + */ +#define JEMALLOC_TCACHE + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage + * segment (DSS). + */ +#define JEMALLOC_DSS + +/* Support memory filling (junk/zero/quarantine/redzone). */ +#define JEMALLOC_FILL + +/* Support the experimental API. */ +#define JEMALLOC_EXPERIMENTAL + +/* Support utrace(2)-based tracing. */ +#define JEMALLOC_UTRACE + +/* Support Valgrind. */ +/* #undef JEMALLOC_VALGRIND */ + +/* Support optional abort() on OOM. */ +#define JEMALLOC_XMALLOC + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +#define JEMALLOC_LAZY_LOCK + +/* One page is 2^STATIC_PAGE_SHIFT bytes. */ +#define STATIC_PAGE_SHIFT 12 + +/* + * If defined, use munmap() to unmap freed chunks, rather than storing them for + * later reuse. This is automatically disabled if configuration determines + * that common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +#define JEMALLOC_MUNMAP + +/* TLS is used to map arenas and magazine caches to threads. */ +#define JEMALLOC_TLS + +/* + * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside + * within jemalloc-owned chunks before dereferencing them. + */ +/* #undef JEMALLOC_IVSALLOC */ + +/* + * Define overrides for non-standard allocator-related functions if they + * are present on the system. + */ +/* #undef JEMALLOC_OVERRIDE_MEMALIGN */ +#define JEMALLOC_OVERRIDE_VALLOC + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +/* #undef JEMALLOC_ZONE */ +/* #undef JEMALLOC_ZONE_VERSION */ + +/* If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). */ +/* #undef JEMALLOC_MREMAP_FIXED */ + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched. + * madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being + * unused, such that they will be discarded rather + * than swapped out. + */ +/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED */ +#define JEMALLOC_PURGE_MADVISE_FREE +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED +# define JEMALLOC_MADV_PURGE MADV_DONTNEED +#elif defined(JEMALLOC_PURGE_MADVISE_FREE) +# define JEMALLOC_MADV_PURGE MADV_FREE +#else +# error "No method defined for purging unused dirty pages." +#endif + +/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ +#define LG_SIZEOF_PTR 3 + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 diff --git a/contrib/jemalloc/src/arena.c b/contrib/jemalloc/src/arena.c new file mode 100644 index 000000000000..989034d46743 --- /dev/null +++ b/contrib/jemalloc/src/arena.c @@ -0,0 +1,2248 @@ +#define JEMALLOC_ARENA_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; +arena_bin_info_t arena_bin_info[NBINS]; + +JEMALLOC_ATTR(aligned(CACHELINE)) +const uint8_t small_size2bin[] = { +#define S2B_8(i) i, +#define S2B_16(i) S2B_8(i) S2B_8(i) +#define S2B_32(i) S2B_16(i) S2B_16(i) +#define S2B_64(i) S2B_32(i) S2B_32(i) +#define S2B_128(i) S2B_64(i) S2B_64(i) +#define S2B_256(i) S2B_128(i) S2B_128(i) +#define S2B_512(i) S2B_256(i) S2B_256(i) +#define S2B_1024(i) S2B_512(i) S2B_512(i) +#define S2B_2048(i) S2B_1024(i) S2B_1024(i) +#define S2B_4096(i) S2B_2048(i) S2B_2048(i) +#define S2B_8192(i) S2B_4096(i) S2B_4096(i) +#define SIZE_CLASS(bin, delta, size) \ + S2B_##delta(bin) + SIZE_CLASSES +#undef S2B_8 +#undef S2B_16 +#undef S2B_32 +#undef S2B_64 +#undef S2B_128 +#undef S2B_256 +#undef S2B_512 +#undef S2B_1024 +#undef S2B_2048 +#undef S2B_4096 +#undef S2B_8192 +#undef SIZE_CLASS +}; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, + bool large, bool zero); +static arena_chunk_t *arena_chunk_alloc(arena_t *arena); +static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk); +static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large, + bool zero); +static void arena_purge(arena_t *arena, bool all); +static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty); +static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run, size_t oldsize, size_t newsize); +static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run, size_t oldsize, size_t newsize, bool dirty); +static arena_run_t *arena_bin_runs_first(arena_bin_t *bin); +static void arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run); +static void arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run); +static arena_run_t *arena_bin_nonfull_run_tryget(arena_bin_t *bin); +static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin); +static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); +static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin); +static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run, arena_bin_t *bin); +static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run, arena_bin_t *bin); +static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, + void *ptr, size_t oldsize, size_t size); +static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, + void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); +static bool arena_ralloc_large(void *ptr, size_t oldsize, size_t size, + size_t extra, bool zero); +static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info, + size_t min_run_size); +static void bin_info_init(void); + +/******************************************************************************/ + +static inline int +arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) +{ + uintptr_t a_mapelm = (uintptr_t)a; + uintptr_t b_mapelm = (uintptr_t)b; + + assert(a != NULL); + assert(b != NULL); + + return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm)); +} + +/* Generate red-black tree functions. */ +rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t, + u.rb_link, arena_run_comp) + +static inline int +arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) +{ + int ret; + size_t a_size = a->bits & ~PAGE_MASK; + size_t b_size = b->bits & ~PAGE_MASK; + + assert((a->bits & CHUNK_MAP_KEY) == CHUNK_MAP_KEY || (a->bits & + CHUNK_MAP_DIRTY) == (b->bits & CHUNK_MAP_DIRTY)); + + ret = (a_size > b_size) - (a_size < b_size); + if (ret == 0) { + uintptr_t a_mapelm, b_mapelm; + + if ((a->bits & CHUNK_MAP_KEY) != CHUNK_MAP_KEY) + a_mapelm = (uintptr_t)a; + else { + /* + * Treat keys as though they are lower than anything + * else. + */ + a_mapelm = 0; + } + b_mapelm = (uintptr_t)b; + + ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm); + } + + return (ret); +} + +/* Generate red-black tree functions. */ +rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t, + u.rb_link, arena_avail_comp) + +static inline void * +arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) +{ + void *ret; + unsigned regind; + bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + + (uintptr_t)bin_info->bitmap_offset); + + assert(run->nfree > 0); + assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false); + + regind = bitmap_sfu(bitmap, &bin_info->bitmap_info); + ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset + + (uintptr_t)(bin_info->reg_interval * regind)); + run->nfree--; + if (regind == run->nextind) + run->nextind++; + assert(regind < run->nextind); + return (ret); +} + +static inline void +arena_run_reg_dalloc(arena_run_t *run, void *ptr) +{ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + size_t binind = arena_bin_index(chunk->arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + unsigned regind = arena_run_regind(run, bin_info, ptr); + bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + + (uintptr_t)bin_info->bitmap_offset); + + assert(run->nfree < bin_info->nregs); + /* Freeing an interior pointer can cause assertion failure. */ + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset)) % + (uintptr_t)bin_info->reg_interval == 0); + assert((uintptr_t)ptr >= (uintptr_t)run + + (uintptr_t)bin_info->reg0_offset); + /* Freeing an unallocated pointer can cause assertion failure. */ + assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind)); + + bitmap_unset(bitmap, &bin_info->bitmap_info, regind); + run->nfree++; +} + +static inline void +arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) +{ + size_t i; + UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << LG_PAGE)); + + for (i = 0; i < PAGE / sizeof(size_t); i++) + assert(p[i] == 0); +} + +static void +arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, + bool zero) +{ + arena_chunk_t *chunk; + size_t run_ind, total_pages, need_pages, rem_pages, i; + size_t flag_dirty; + arena_avail_tree_t *runs_avail; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); + flag_dirty = chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY; + runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty : + &arena->runs_avail_clean; + total_pages = (chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) >> + LG_PAGE; + assert((chunk->map[run_ind+total_pages-1-map_bias].bits & + CHUNK_MAP_DIRTY) == flag_dirty); + need_pages = (size >> LG_PAGE); + assert(need_pages > 0); + assert(need_pages <= total_pages); + rem_pages = total_pages - need_pages; + + arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]); + if (config_stats) { + /* + * Update stats_cactive if nactive is crossing a chunk + * multiple. + */ + size_t cactive_diff = CHUNK_CEILING((arena->nactive + + need_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive << + LG_PAGE); + if (cactive_diff != 0) + stats_cactive_add(cactive_diff); + } + arena->nactive += need_pages; + + /* Keep track of trailing unused pages for later use. */ + if (rem_pages > 0) { + if (flag_dirty != 0) { + chunk->map[run_ind+need_pages-map_bias].bits = + (rem_pages << LG_PAGE) | CHUNK_MAP_DIRTY; + chunk->map[run_ind+total_pages-1-map_bias].bits = + (rem_pages << LG_PAGE) | CHUNK_MAP_DIRTY; + } else { + chunk->map[run_ind+need_pages-map_bias].bits = + (rem_pages << LG_PAGE) | + (chunk->map[run_ind+need_pages-map_bias].bits & + CHUNK_MAP_UNZEROED); + chunk->map[run_ind+total_pages-1-map_bias].bits = + (rem_pages << LG_PAGE) | + (chunk->map[run_ind+total_pages-1-map_bias].bits & + CHUNK_MAP_UNZEROED); + } + arena_avail_tree_insert(runs_avail, + &chunk->map[run_ind+need_pages-map_bias]); + } + + /* Update dirty page accounting. */ + if (flag_dirty != 0) { + chunk->ndirty -= need_pages; + arena->ndirty -= need_pages; + } + + /* + * Update the page map separately for large vs. small runs, since it is + * possible to avoid iteration for large mallocs. + */ + if (large) { + if (zero) { + if (flag_dirty == 0) { + /* + * The run is clean, so some pages may be + * zeroed (i.e. never before touched). + */ + for (i = 0; i < need_pages; i++) { + if ((chunk->map[run_ind+i-map_bias].bits + & CHUNK_MAP_UNZEROED) != 0) { + VALGRIND_MAKE_MEM_UNDEFINED( + (void *)((uintptr_t) + chunk + ((run_ind+i) << + LG_PAGE)), PAGE); + memset((void *)((uintptr_t) + chunk + ((run_ind+i) << + LG_PAGE)), 0, PAGE); + } else if (config_debug) { + VALGRIND_MAKE_MEM_DEFINED( + (void *)((uintptr_t) + chunk + ((run_ind+i) << + LG_PAGE)), PAGE); + arena_chunk_validate_zeroed( + chunk, run_ind+i); + } + } + } else { + /* + * The run is dirty, so all pages must be + * zeroed. + */ + VALGRIND_MAKE_MEM_UNDEFINED((void + *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), (need_pages << LG_PAGE)); + memset((void *)((uintptr_t)chunk + (run_ind << + LG_PAGE)), 0, (need_pages << LG_PAGE)); + } + } + + /* + * Set the last element first, in case the run only contains one + * page (i.e. both statements set the same element). + */ + chunk->map[run_ind+need_pages-1-map_bias].bits = + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | flag_dirty; + chunk->map[run_ind-map_bias].bits = size | flag_dirty | + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + } else { + assert(zero == false); + /* + * Propagate the dirty and unzeroed flags to the allocated + * small run, so that arena_dalloc_bin_run() has the ability to + * conditionally trim clean pages. + */ + chunk->map[run_ind-map_bias].bits = + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) | + CHUNK_MAP_ALLOCATED | flag_dirty; + /* + * The first page will always be dirtied during small run + * initialization, so a validation failure here would not + * actually cause an observable failure. + */ + if (config_debug && flag_dirty == 0 && + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) + == 0) + arena_chunk_validate_zeroed(chunk, run_ind); + for (i = 1; i < need_pages - 1; i++) { + chunk->map[run_ind+i-map_bias].bits = (i << LG_PAGE) + | (chunk->map[run_ind+i-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED; + if (config_debug && flag_dirty == 0 && + (chunk->map[run_ind+i-map_bias].bits & + CHUNK_MAP_UNZEROED) == 0) + arena_chunk_validate_zeroed(chunk, run_ind+i); + } + chunk->map[run_ind+need_pages-1-map_bias].bits = ((need_pages + - 1) << LG_PAGE) | + (chunk->map[run_ind+need_pages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED | flag_dirty; + if (config_debug && flag_dirty == 0 && + (chunk->map[run_ind+need_pages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) == 0) { + arena_chunk_validate_zeroed(chunk, + run_ind+need_pages-1); + } + } +} + +static arena_chunk_t * +arena_chunk_alloc(arena_t *arena) +{ + arena_chunk_t *chunk; + size_t i; + + if (arena->spare != NULL) { + arena_avail_tree_t *runs_avail; + + chunk = arena->spare; + arena->spare = NULL; + + /* Insert the run into the appropriate runs_avail_* tree. */ + if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0) + runs_avail = &arena->runs_avail_clean; + else + runs_avail = &arena->runs_avail_dirty; + assert((chunk->map[0].bits & ~PAGE_MASK) == arena_maxclass); + assert((chunk->map[chunk_npages-1-map_bias].bits & ~PAGE_MASK) + == arena_maxclass); + assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) == + (chunk->map[chunk_npages-1-map_bias].bits & + CHUNK_MAP_DIRTY)); + arena_avail_tree_insert(runs_avail, &chunk->map[0]); + } else { + bool zero; + size_t unzeroed; + + zero = false; + malloc_mutex_unlock(&arena->lock); + chunk = (arena_chunk_t *)chunk_alloc(chunksize, chunksize, + false, &zero); + malloc_mutex_lock(&arena->lock); + if (chunk == NULL) + return (NULL); + if (config_stats) + arena->stats.mapped += chunksize; + + chunk->arena = arena; + ql_elm_new(chunk, link_dirty); + chunk->dirtied = false; + + /* + * Claim that no pages are in use, since the header is merely + * overhead. + */ + chunk->ndirty = 0; + + /* + * Initialize the map to contain one maximal free untouched run. + * Mark the pages as zeroed iff chunk_alloc() returned a zeroed + * chunk. + */ + unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED; + chunk->map[0].bits = arena_maxclass | unzeroed; + /* + * There is no need to initialize the internal page map entries + * unless the chunk is not zeroed. + */ + if (zero == false) { + for (i = map_bias+1; i < chunk_npages-1; i++) + chunk->map[i-map_bias].bits = unzeroed; + } else if (config_debug) { + for (i = map_bias+1; i < chunk_npages-1; i++) + assert(chunk->map[i-map_bias].bits == unzeroed); + } + chunk->map[chunk_npages-1-map_bias].bits = arena_maxclass | + unzeroed; + + /* Insert the run into the runs_avail_clean tree. */ + arena_avail_tree_insert(&arena->runs_avail_clean, + &chunk->map[0]); + } + + return (chunk); +} + +static void +arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) +{ + arena_avail_tree_t *runs_avail; + + /* + * Remove run from the appropriate runs_avail_* tree, so that the arena + * does not use it. + */ + if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0) + runs_avail = &arena->runs_avail_clean; + else + runs_avail = &arena->runs_avail_dirty; + arena_avail_tree_remove(runs_avail, &chunk->map[0]); + + if (arena->spare != NULL) { + arena_chunk_t *spare = arena->spare; + + arena->spare = chunk; + if (spare->dirtied) { + ql_remove(&chunk->arena->chunks_dirty, spare, + link_dirty); + arena->ndirty -= spare->ndirty; + } + malloc_mutex_unlock(&arena->lock); + chunk_dealloc((void *)spare, chunksize, true); + malloc_mutex_lock(&arena->lock); + if (config_stats) + arena->stats.mapped -= chunksize; + } else + arena->spare = chunk; +} + +static arena_run_t * +arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) +{ + arena_chunk_t *chunk; + arena_run_t *run; + arena_chunk_map_t *mapelm, key; + + assert(size <= arena_maxclass); + assert((size & PAGE_MASK) == 0); + + /* Search the arena's chunks for the lowest best fit. */ + key.bits = size | CHUNK_MAP_KEY; + mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key); + if (mapelm != NULL) { + arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; + + run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << + LG_PAGE)); + arena_run_split(arena, run, size, large, zero); + return (run); + } + mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); + if (mapelm != NULL) { + arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; + + run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << + LG_PAGE)); + arena_run_split(arena, run, size, large, zero); + return (run); + } + + /* + * No usable runs. Create a new chunk from which to allocate the run. + */ + chunk = arena_chunk_alloc(arena); + if (chunk != NULL) { + run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE)); + arena_run_split(arena, run, size, large, zero); + return (run); + } + + /* + * arena_chunk_alloc() failed, but another thread may have made + * sufficient memory available while this one dropped arena->lock in + * arena_chunk_alloc(), so search one more time. + */ + mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key); + if (mapelm != NULL) { + arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; + + run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << + LG_PAGE)); + arena_run_split(arena, run, size, large, zero); + return (run); + } + mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); + if (mapelm != NULL) { + arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; + + run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << + LG_PAGE)); + arena_run_split(arena, run, size, large, zero); + return (run); + } + + return (NULL); +} + +static inline void +arena_maybe_purge(arena_t *arena) +{ + + /* Enforce opt_lg_dirty_mult. */ + if (opt_lg_dirty_mult >= 0 && arena->ndirty > arena->npurgatory && + (arena->ndirty - arena->npurgatory) > chunk_npages && + (arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - + arena->npurgatory)) + arena_purge(arena, false); +} + +static inline void +arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) +{ + ql_head(arena_chunk_map_t) mapelms; + arena_chunk_map_t *mapelm; + size_t pageind, flag_unzeroed; + size_t ndirty; + size_t nmadvise; + + ql_new(&mapelms); + + flag_unzeroed = +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED + /* + * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous + * mappings, but not for file-backed mappings. + */ + 0 +#else + CHUNK_MAP_UNZEROED +#endif + ; + + /* + * If chunk is the spare, temporarily re-allocate it, 1) so that its + * run is reinserted into runs_avail_dirty, and 2) so that it cannot be + * completely discarded by another thread while arena->lock is dropped + * by this thread. Note that the arena_run_dalloc() call will + * implicitly deallocate the chunk, so no explicit action is required + * in this function to deallocate the chunk. + * + * Note that once a chunk contains dirty pages, it cannot again contain + * a single run unless 1) it is a dirty run, or 2) this function purges + * dirty pages and causes the transition to a single clean run. Thus + * (chunk == arena->spare) is possible, but it is not possible for + * this function to be called on the spare unless it contains a dirty + * run. + */ + if (chunk == arena->spare) { + assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) != 0); + arena_chunk_alloc(arena); + } + + /* Temporarily allocate all free dirty runs within chunk. */ + for (pageind = map_bias; pageind < chunk_npages;) { + mapelm = &chunk->map[pageind-map_bias]; + if ((mapelm->bits & CHUNK_MAP_ALLOCATED) == 0) { + size_t npages; + + npages = mapelm->bits >> LG_PAGE; + assert(pageind + npages <= chunk_npages); + if (mapelm->bits & CHUNK_MAP_DIRTY) { + size_t i; + + arena_avail_tree_remove( + &arena->runs_avail_dirty, mapelm); + + mapelm->bits = (npages << LG_PAGE) | + flag_unzeroed | CHUNK_MAP_LARGE | + CHUNK_MAP_ALLOCATED; + /* + * Update internal elements in the page map, so + * that CHUNK_MAP_UNZEROED is properly set. + */ + for (i = 1; i < npages - 1; i++) { + chunk->map[pageind+i-map_bias].bits = + flag_unzeroed; + } + if (npages > 1) { + chunk->map[ + pageind+npages-1-map_bias].bits = + flag_unzeroed | CHUNK_MAP_LARGE | + CHUNK_MAP_ALLOCATED; + } + + if (config_stats) { + /* + * Update stats_cactive if nactive is + * crossing a chunk multiple. + */ + size_t cactive_diff = + CHUNK_CEILING((arena->nactive + + npages) << LG_PAGE) - + CHUNK_CEILING(arena->nactive << + LG_PAGE); + if (cactive_diff != 0) + stats_cactive_add(cactive_diff); + } + arena->nactive += npages; + /* Append to list for later processing. */ + ql_elm_new(mapelm, u.ql_link); + ql_tail_insert(&mapelms, mapelm, u.ql_link); + } + + pageind += npages; + } else { + /* Skip allocated run. */ + if (mapelm->bits & CHUNK_MAP_LARGE) + pageind += mapelm->bits >> LG_PAGE; + else { + arena_run_t *run = (arena_run_t *)((uintptr_t) + chunk + (uintptr_t)(pageind << LG_PAGE)); + + assert((mapelm->bits >> LG_PAGE) == 0); + size_t binind = arena_bin_index(arena, + run->bin); + arena_bin_info_t *bin_info = + &arena_bin_info[binind]; + pageind += bin_info->run_size >> LG_PAGE; + } + } + } + assert(pageind == chunk_npages); + + if (config_debug) + ndirty = chunk->ndirty; + if (config_stats) + arena->stats.purged += chunk->ndirty; + arena->ndirty -= chunk->ndirty; + chunk->ndirty = 0; + ql_remove(&arena->chunks_dirty, chunk, link_dirty); + chunk->dirtied = false; + + malloc_mutex_unlock(&arena->lock); + if (config_stats) + nmadvise = 0; + ql_foreach(mapelm, &mapelms, u.ql_link) { + size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; + size_t npages = mapelm->bits >> LG_PAGE; + + assert(pageind + npages <= chunk_npages); + assert(ndirty >= npages); + if (config_debug) + ndirty -= npages; + + madvise((void *)((uintptr_t)chunk + (pageind << LG_PAGE)), + (npages << LG_PAGE), JEMALLOC_MADV_PURGE); + if (config_stats) + nmadvise++; + } + assert(ndirty == 0); + malloc_mutex_lock(&arena->lock); + if (config_stats) + arena->stats.nmadvise += nmadvise; + + /* Deallocate runs. */ + for (mapelm = ql_first(&mapelms); mapelm != NULL; + mapelm = ql_first(&mapelms)) { + size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)(pageind << LG_PAGE)); + + ql_remove(&mapelms, mapelm, u.ql_link); + arena_run_dalloc(arena, run, false); + } +} + +static void +arena_purge(arena_t *arena, bool all) +{ + arena_chunk_t *chunk; + size_t npurgatory; + if (config_debug) { + size_t ndirty = 0; + + ql_foreach(chunk, &arena->chunks_dirty, link_dirty) { + assert(chunk->dirtied); + ndirty += chunk->ndirty; + } + assert(ndirty == arena->ndirty); + } + assert(arena->ndirty > arena->npurgatory || all); + assert(arena->ndirty - arena->npurgatory > chunk_npages || all); + assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - + arena->npurgatory) || all); + + if (config_stats) + arena->stats.npurge++; + + /* + * Compute the minimum number of pages that this thread should try to + * purge, and add the result to arena->npurgatory. This will keep + * multiple threads from racing to reduce ndirty below the threshold. + */ + npurgatory = arena->ndirty - arena->npurgatory; + if (all == false) { + assert(npurgatory >= arena->nactive >> opt_lg_dirty_mult); + npurgatory -= arena->nactive >> opt_lg_dirty_mult; + } + arena->npurgatory += npurgatory; + + while (npurgatory > 0) { + /* Get next chunk with dirty pages. */ + chunk = ql_first(&arena->chunks_dirty); + if (chunk == NULL) { + /* + * This thread was unable to purge as many pages as + * originally intended, due to races with other threads + * that either did some of the purging work, or re-used + * dirty pages. + */ + arena->npurgatory -= npurgatory; + return; + } + while (chunk->ndirty == 0) { + ql_remove(&arena->chunks_dirty, chunk, link_dirty); + chunk->dirtied = false; + chunk = ql_first(&arena->chunks_dirty); + if (chunk == NULL) { + /* Same logic as for above. */ + arena->npurgatory -= npurgatory; + return; + } + } + + if (chunk->ndirty > npurgatory) { + /* + * This thread will, at a minimum, purge all the dirty + * pages in chunk, so set npurgatory to reflect this + * thread's commitment to purge the pages. This tends + * to reduce the chances of the following scenario: + * + * 1) This thread sets arena->npurgatory such that + * (arena->ndirty - arena->npurgatory) is at the + * threshold. + * 2) This thread drops arena->lock. + * 3) Another thread causes one or more pages to be + * dirtied, and immediately determines that it must + * purge dirty pages. + * + * If this scenario *does* play out, that's okay, + * because all of the purging work being done really + * needs to happen. + */ + arena->npurgatory += chunk->ndirty - npurgatory; + npurgatory = chunk->ndirty; + } + + arena->npurgatory -= chunk->ndirty; + npurgatory -= chunk->ndirty; + arena_chunk_purge(arena, chunk); + } +} + +void +arena_purge_all(arena_t *arena) +{ + + malloc_mutex_lock(&arena->lock); + arena_purge(arena, true); + malloc_mutex_unlock(&arena->lock); +} + +static void +arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) +{ + arena_chunk_t *chunk; + size_t size, run_ind, run_pages, flag_dirty; + arena_avail_tree_t *runs_avail; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); + assert(run_ind >= map_bias); + assert(run_ind < chunk_npages); + if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_LARGE) != 0) { + size = chunk->map[run_ind-map_bias].bits & ~PAGE_MASK; + assert(size == PAGE || + (chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits & + ~PAGE_MASK) == 0); + assert((chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits & + CHUNK_MAP_LARGE) != 0); + assert((chunk->map[run_ind+(size>>LG_PAGE)-1-map_bias].bits & + CHUNK_MAP_ALLOCATED) != 0); + } else { + size_t binind = arena_bin_index(arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + size = bin_info->run_size; + } + run_pages = (size >> LG_PAGE); + if (config_stats) { + /* + * Update stats_cactive if nactive is crossing a chunk + * multiple. + */ + size_t cactive_diff = CHUNK_CEILING(arena->nactive << LG_PAGE) - + CHUNK_CEILING((arena->nactive - run_pages) << LG_PAGE); + if (cactive_diff != 0) + stats_cactive_sub(cactive_diff); + } + arena->nactive -= run_pages; + + /* + * The run is dirty if the caller claims to have dirtied it, as well as + * if it was already dirty before being allocated. + */ + if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) != 0) + dirty = true; + flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; + runs_avail = dirty ? &arena->runs_avail_dirty : + &arena->runs_avail_clean; + + /* Mark pages as unallocated in the chunk map. */ + if (dirty) { + chunk->map[run_ind-map_bias].bits = size | CHUNK_MAP_DIRTY; + chunk->map[run_ind+run_pages-1-map_bias].bits = size | + CHUNK_MAP_DIRTY; + + chunk->ndirty += run_pages; + arena->ndirty += run_pages; + } else { + chunk->map[run_ind-map_bias].bits = size | + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED); + chunk->map[run_ind+run_pages-1-map_bias].bits = size | + (chunk->map[run_ind+run_pages-1-map_bias].bits & + CHUNK_MAP_UNZEROED); + } + + /* Try to coalesce forward. */ + if (run_ind + run_pages < chunk_npages && + (chunk->map[run_ind+run_pages-map_bias].bits & CHUNK_MAP_ALLOCATED) + == 0 && (chunk->map[run_ind+run_pages-map_bias].bits & + CHUNK_MAP_DIRTY) == flag_dirty) { + size_t nrun_size = chunk->map[run_ind+run_pages-map_bias].bits & + ~PAGE_MASK; + size_t nrun_pages = nrun_size >> LG_PAGE; + + /* + * Remove successor from runs_avail; the coalesced run is + * inserted later. + */ + assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits + & ~PAGE_MASK) == nrun_size); + assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits + & CHUNK_MAP_ALLOCATED) == 0); + assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits + & CHUNK_MAP_DIRTY) == flag_dirty); + arena_avail_tree_remove(runs_avail, + &chunk->map[run_ind+run_pages-map_bias]); + + size += nrun_size; + run_pages += nrun_pages; + + chunk->map[run_ind-map_bias].bits = size | + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); + chunk->map[run_ind+run_pages-1-map_bias].bits = size | + (chunk->map[run_ind+run_pages-1-map_bias].bits & + CHUNK_MAP_FLAGS_MASK); + } + + /* Try to coalesce backward. */ + if (run_ind > map_bias && (chunk->map[run_ind-1-map_bias].bits & + CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[run_ind-1-map_bias].bits & + CHUNK_MAP_DIRTY) == flag_dirty) { + size_t prun_size = chunk->map[run_ind-1-map_bias].bits & + ~PAGE_MASK; + size_t prun_pages = prun_size >> LG_PAGE; + + run_ind -= prun_pages; + + /* + * Remove predecessor from runs_avail; the coalesced run is + * inserted later. + */ + assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) + == prun_size); + assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_ALLOCATED) + == 0); + assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) + == flag_dirty); + arena_avail_tree_remove(runs_avail, + &chunk->map[run_ind-map_bias]); + + size += prun_size; + run_pages += prun_pages; + + chunk->map[run_ind-map_bias].bits = size | + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); + chunk->map[run_ind+run_pages-1-map_bias].bits = size | + (chunk->map[run_ind+run_pages-1-map_bias].bits & + CHUNK_MAP_FLAGS_MASK); + } + + /* Insert into runs_avail, now that coalescing is complete. */ + assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) == + (chunk->map[run_ind+run_pages-1-map_bias].bits & ~PAGE_MASK)); + assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == + (chunk->map[run_ind+run_pages-1-map_bias].bits & CHUNK_MAP_DIRTY)); + arena_avail_tree_insert(runs_avail, &chunk->map[run_ind-map_bias]); + + if (dirty) { + /* + * Insert into chunks_dirty before potentially calling + * arena_chunk_dealloc(), so that chunks_dirty and + * arena->ndirty are consistent. + */ + if (chunk->dirtied == false) { + ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty); + chunk->dirtied = true; + } + } + + /* + * Deallocate chunk if it is now completely unused. The bit + * manipulation checks whether the first run is unallocated and extends + * to the end of the chunk. + */ + if ((chunk->map[0].bits & (~PAGE_MASK | CHUNK_MAP_ALLOCATED)) == + arena_maxclass) + arena_chunk_dealloc(arena, chunk); + + /* + * It is okay to do dirty page processing here even if the chunk was + * deallocated above, since in that case it is the spare. Waiting + * until after possible chunk deallocation to do dirty processing + * allows for an old spare to be fully deallocated, thus decreasing the + * chances of spuriously crossing the dirty page purging threshold. + */ + if (dirty) + arena_maybe_purge(arena); +} + +static void +arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + size_t oldsize, size_t newsize) +{ + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; + size_t head_npages = (oldsize - newsize) >> LG_PAGE; + size_t flag_dirty = chunk->map[pageind-map_bias].bits & CHUNK_MAP_DIRTY; + + assert(oldsize > newsize); + + /* + * Update the chunk map so that arena_run_dalloc() can treat the + * leading run as separately allocated. Set the last element of each + * run first, in case of single-page runs. + */ + assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0); + assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); + chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty | + (chunk->map[pageind+head_npages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + chunk->map[pageind-map_bias].bits = (oldsize - newsize) + | flag_dirty | (chunk->map[pageind-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + + if (config_debug) { + UNUSED size_t tail_npages = newsize >> LG_PAGE; + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] + .bits & ~PAGE_MASK) == 0); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] + .bits & CHUNK_MAP_DIRTY) == flag_dirty); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] + .bits & CHUNK_MAP_LARGE) != 0); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] + .bits & CHUNK_MAP_ALLOCATED) != 0); + } + chunk->map[pageind+head_npages-map_bias].bits = newsize | flag_dirty | + (chunk->map[pageind+head_npages-map_bias].bits & + CHUNK_MAP_FLAGS_MASK) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + + arena_run_dalloc(arena, run, false); +} + +static void +arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + size_t oldsize, size_t newsize, bool dirty) +{ + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; + size_t head_npages = newsize >> LG_PAGE; + size_t tail_npages = (oldsize - newsize) >> LG_PAGE; + size_t flag_dirty = chunk->map[pageind-map_bias].bits & + CHUNK_MAP_DIRTY; + + assert(oldsize > newsize); + + /* + * Update the chunk map so that arena_run_dalloc() can treat the + * trailing run as separately allocated. Set the last element of each + * run first, in case of single-page runs. + */ + assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0); + assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); + chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty | + (chunk->map[pageind+head_npages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + chunk->map[pageind-map_bias].bits = newsize | flag_dirty | + (chunk->map[pageind-map_bias].bits & CHUNK_MAP_UNZEROED) | + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & + ~PAGE_MASK) == 0); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & + CHUNK_MAP_LARGE) != 0); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & + CHUNK_MAP_ALLOCATED) != 0); + chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits = + flag_dirty | + (chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + chunk->map[pageind+head_npages-map_bias].bits = (oldsize - newsize) | + flag_dirty | (chunk->map[pageind+head_npages-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + + arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize), + dirty); +} + +static arena_run_t * +arena_bin_runs_first(arena_bin_t *bin) +{ + arena_chunk_map_t *mapelm = arena_run_tree_first(&bin->runs); + if (mapelm != NULL) { + arena_chunk_t *chunk; + size_t pageind; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); + pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t))) + map_bias; + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << + LG_PAGE)); + return (run); + } + + return (NULL); +} + +static void +arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run) +{ + arena_chunk_t *chunk = CHUNK_ADDR2BASE(run); + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; + arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; + + assert(arena_run_tree_search(&bin->runs, mapelm) == NULL); + + arena_run_tree_insert(&bin->runs, mapelm); +} + +static void +arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run) +{ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE; + arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; + + assert(arena_run_tree_search(&bin->runs, mapelm) != NULL); + + arena_run_tree_remove(&bin->runs, mapelm); +} + +static arena_run_t * +arena_bin_nonfull_run_tryget(arena_bin_t *bin) +{ + arena_run_t *run = arena_bin_runs_first(bin); + if (run != NULL) { + arena_bin_runs_remove(bin, run); + if (config_stats) + bin->stats.reruns++; + } + return (run); +} + +static arena_run_t * +arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) +{ + arena_run_t *run; + size_t binind; + arena_bin_info_t *bin_info; + + /* Look for a usable run. */ + run = arena_bin_nonfull_run_tryget(bin); + if (run != NULL) + return (run); + /* No existing runs have any space available. */ + + binind = arena_bin_index(arena, bin); + bin_info = &arena_bin_info[binind]; + + /* Allocate a new run. */ + malloc_mutex_unlock(&bin->lock); + /******************************/ + malloc_mutex_lock(&arena->lock); + run = arena_run_alloc(arena, bin_info->run_size, false, false); + if (run != NULL) { + bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + + (uintptr_t)bin_info->bitmap_offset); + + /* Initialize run internals. */ + run->bin = bin; + run->nextind = 0; + run->nfree = bin_info->nregs; + bitmap_init(bitmap, &bin_info->bitmap_info); + } + malloc_mutex_unlock(&arena->lock); + /********************************/ + malloc_mutex_lock(&bin->lock); + if (run != NULL) { + if (config_stats) { + bin->stats.nruns++; + bin->stats.curruns++; + } + return (run); + } + + /* + * arena_run_alloc() failed, but another thread may have made + * sufficient memory available while this one dropped bin->lock above, + * so search one more time. + */ + run = arena_bin_nonfull_run_tryget(bin); + if (run != NULL) + return (run); + + return (NULL); +} + +/* Re-fill bin->runcur, then call arena_run_reg_alloc(). */ +static void * +arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) +{ + void *ret; + size_t binind; + arena_bin_info_t *bin_info; + arena_run_t *run; + + binind = arena_bin_index(arena, bin); + bin_info = &arena_bin_info[binind]; + bin->runcur = NULL; + run = arena_bin_nonfull_run_get(arena, bin); + if (bin->runcur != NULL && bin->runcur->nfree > 0) { + /* + * Another thread updated runcur while this one ran without the + * bin lock in arena_bin_nonfull_run_get(). + */ + assert(bin->runcur->nfree > 0); + ret = arena_run_reg_alloc(bin->runcur, bin_info); + if (run != NULL) { + arena_chunk_t *chunk; + + /* + * arena_run_alloc() may have allocated run, or it may + * have pulled run from the bin's run tree. Therefore + * it is unsafe to make any assumptions about how run + * has previously been used, and arena_bin_lower_run() + * must be called, as if a region were just deallocated + * from the run. + */ + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + if (run->nfree == bin_info->nregs) + arena_dalloc_bin_run(arena, chunk, run, bin); + else + arena_bin_lower_run(arena, chunk, run, bin); + } + return (ret); + } + + if (run == NULL) + return (NULL); + + bin->runcur = run; + + assert(bin->runcur->nfree > 0); + + return (arena_run_reg_alloc(bin->runcur, bin_info)); +} + +void +arena_prof_accum(arena_t *arena, uint64_t accumbytes) +{ + + if (prof_interval != 0) { + arena->prof_accumbytes += accumbytes; + if (arena->prof_accumbytes >= prof_interval) { + prof_idump(); + arena->prof_accumbytes -= prof_interval; + } + } +} + +void +arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind, + uint64_t prof_accumbytes) +{ + unsigned i, nfill; + arena_bin_t *bin; + arena_run_t *run; + void *ptr; + + assert(tbin->ncached == 0); + + if (config_prof) { + malloc_mutex_lock(&arena->lock); + arena_prof_accum(arena, prof_accumbytes); + malloc_mutex_unlock(&arena->lock); + } + bin = &arena->bins[binind]; + malloc_mutex_lock(&bin->lock); + for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> + tbin->lg_fill_div); i < nfill; i++) { + if ((run = bin->runcur) != NULL && run->nfree > 0) + ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]); + else + ptr = arena_bin_malloc_hard(arena, bin); + if (ptr == NULL) + break; + if (config_fill && opt_junk) { + arena_alloc_junk_small(ptr, &arena_bin_info[binind], + true); + } + /* Insert such that low regions get used first. */ + tbin->avail[nfill - 1 - i] = ptr; + } + if (config_stats) { + bin->stats.allocated += i * arena_bin_info[binind].reg_size; + bin->stats.nmalloc += i; + bin->stats.nrequests += tbin->tstats.nrequests; + bin->stats.nfills++; + tbin->tstats.nrequests = 0; + } + malloc_mutex_unlock(&bin->lock); + tbin->ncached = i; +} + +void +arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero) +{ + + if (zero) { + size_t redzone_size = bin_info->redzone_size; + memset((void *)((uintptr_t)ptr - redzone_size), 0xa5, + redzone_size); + memset((void *)((uintptr_t)ptr + bin_info->reg_size), 0xa5, + redzone_size); + } else { + memset((void *)((uintptr_t)ptr - bin_info->redzone_size), 0xa5, + bin_info->reg_interval); + } +} + +void +arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info) +{ + size_t size = bin_info->reg_size; + size_t redzone_size = bin_info->redzone_size; + size_t i; + bool error = false; + + for (i = 1; i <= redzone_size; i++) { + unsigned byte; + if ((byte = *(uint8_t *)((uintptr_t)ptr - i)) != 0xa5) { + error = true; + malloc_printf(": Corrupt redzone " + "%zu byte%s before %p (size %zu), byte=%#x\n", i, + (i == 1) ? "" : "s", ptr, size, byte); + } + } + for (i = 0; i < redzone_size; i++) { + unsigned byte; + if ((byte = *(uint8_t *)((uintptr_t)ptr + size + i)) != 0xa5) { + error = true; + malloc_printf(": Corrupt redzone " + "%zu byte%s after end of %p (size %zu), byte=%#x\n", + i, (i == 1) ? "" : "s", ptr, size, byte); + } + } + if (opt_abort && error) + abort(); + + memset((void *)((uintptr_t)ptr - redzone_size), 0x5a, + bin_info->reg_interval); +} + +void * +arena_malloc_small(arena_t *arena, size_t size, bool zero) +{ + void *ret; + arena_bin_t *bin; + arena_run_t *run; + size_t binind; + + binind = SMALL_SIZE2BIN(size); + assert(binind < NBINS); + bin = &arena->bins[binind]; + size = arena_bin_info[binind].reg_size; + + malloc_mutex_lock(&bin->lock); + if ((run = bin->runcur) != NULL && run->nfree > 0) + ret = arena_run_reg_alloc(run, &arena_bin_info[binind]); + else + ret = arena_bin_malloc_hard(arena, bin); + + if (ret == NULL) { + malloc_mutex_unlock(&bin->lock); + return (NULL); + } + + if (config_stats) { + bin->stats.allocated += size; + bin->stats.nmalloc++; + bin->stats.nrequests++; + } + malloc_mutex_unlock(&bin->lock); + if (config_prof && isthreaded == false) { + malloc_mutex_lock(&arena->lock); + arena_prof_accum(arena, size); + malloc_mutex_unlock(&arena->lock); + } + + if (zero == false) { + if (config_fill) { + if (opt_junk) { + arena_alloc_junk_small(ret, + &arena_bin_info[binind], false); + } else if (opt_zero) + memset(ret, 0, size); + } + } else { + if (config_fill && opt_junk) { + arena_alloc_junk_small(ret, &arena_bin_info[binind], + true); + } + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + memset(ret, 0, size); + } + + return (ret); +} + +void * +arena_malloc_large(arena_t *arena, size_t size, bool zero) +{ + void *ret; + + /* Large allocation. */ + size = PAGE_CEILING(size); + malloc_mutex_lock(&arena->lock); + ret = (void *)arena_run_alloc(arena, size, true, zero); + if (ret == NULL) { + malloc_mutex_unlock(&arena->lock); + return (NULL); + } + if (config_stats) { + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + } + if (config_prof) + arena_prof_accum(arena, size); + malloc_mutex_unlock(&arena->lock); + + if (zero == false) { + if (config_fill) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } + } + + return (ret); +} + +/* Only handles large allocations that require more than page alignment. */ +void * +arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) +{ + void *ret; + size_t alloc_size, leadsize, trailsize; + arena_run_t *run; + arena_chunk_t *chunk; + + assert((size & PAGE_MASK) == 0); + + alignment = PAGE_CEILING(alignment); + alloc_size = size + alignment - PAGE; + + malloc_mutex_lock(&arena->lock); + run = arena_run_alloc(arena, alloc_size, true, zero); + if (run == NULL) { + malloc_mutex_unlock(&arena->lock); + return (NULL); + } + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + + leadsize = ALIGNMENT_CEILING((uintptr_t)run, alignment) - + (uintptr_t)run; + assert(alloc_size >= leadsize + size); + trailsize = alloc_size - leadsize - size; + ret = (void *)((uintptr_t)run + leadsize); + if (leadsize != 0) { + arena_run_trim_head(arena, chunk, run, alloc_size, alloc_size - + leadsize); + } + if (trailsize != 0) { + arena_run_trim_tail(arena, chunk, ret, size + trailsize, size, + false); + } + + if (config_stats) { + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + } + malloc_mutex_unlock(&arena->lock); + + if (config_fill && zero == false) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } + return (ret); +} + +/* Return the size of the allocation pointed to by ptr. */ +size_t +arena_salloc(const void *ptr, bool demote) +{ + size_t ret; + arena_chunk_t *chunk; + size_t pageind, mapbits; + + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + mapbits = chunk->map[pageind-map_bias].bits; + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapbits >> LG_PAGE)) << LG_PAGE)); + size_t binind = arena_bin_index(chunk->arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval + == 0); + ret = bin_info->reg_size; + } else { + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + ret = mapbits & ~PAGE_MASK; + if (demote && prof_promote && ret == PAGE && (mapbits & + CHUNK_MAP_CLASS_MASK) != 0) { + size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> + CHUNK_MAP_CLASS_SHIFT) - 1; + assert(binind < NBINS); + ret = arena_bin_info[binind].reg_size; + } + assert(ret != 0); + } + + return (ret); +} + +void +arena_prof_promoted(const void *ptr, size_t size) +{ + arena_chunk_t *chunk; + size_t pageind, binind; + + assert(config_prof); + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + assert(isalloc(ptr, false) == PAGE); + assert(isalloc(ptr, true) == PAGE); + assert(size <= SMALL_MAXCLASS); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + binind = SMALL_SIZE2BIN(size); + assert(binind < NBINS); + chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & + ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT); + + assert(isalloc(ptr, false) == PAGE); + assert(isalloc(ptr, true) == size); +} + +static void +arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin) +{ + + /* Dissociate run from bin. */ + if (run == bin->runcur) + bin->runcur = NULL; + else { + size_t binind = arena_bin_index(chunk->arena, bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + + if (bin_info->nregs != 1) { + /* + * This block's conditional is necessary because if the + * run only contains one region, then it never gets + * inserted into the non-full runs tree. + */ + arena_bin_runs_remove(bin, run); + } + } +} + +static void +arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin) +{ + size_t binind; + arena_bin_info_t *bin_info; + size_t npages, run_ind, past; + + assert(run != bin->runcur); + assert(arena_run_tree_search(&bin->runs, &chunk->map[ + (((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE)-map_bias]) == NULL); + + binind = arena_bin_index(chunk->arena, run->bin); + bin_info = &arena_bin_info[binind]; + + malloc_mutex_unlock(&bin->lock); + /******************************/ + npages = bin_info->run_size >> LG_PAGE; + run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE); + past = (size_t)(PAGE_CEILING((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind * + bin_info->reg_interval - bin_info->redzone_size) - + (uintptr_t)chunk) >> LG_PAGE); + malloc_mutex_lock(&arena->lock); + + /* + * If the run was originally clean, and some pages were never touched, + * trim the clean pages before deallocating the dirty portion of the + * run. + */ + if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == 0 && past + - run_ind < npages) { + /* + * Trim clean pages. Convert to large run beforehand. Set the + * last map element first, in case this is a one-page run. + */ + chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE | + (chunk->map[run_ind+npages-1-map_bias].bits & + CHUNK_MAP_FLAGS_MASK); + chunk->map[run_ind-map_bias].bits = bin_info->run_size | + CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits & + CHUNK_MAP_FLAGS_MASK); + arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE), + ((past - run_ind) << LG_PAGE), false); + /* npages = past - run_ind; */ + } + arena_run_dalloc(arena, run, true); + malloc_mutex_unlock(&arena->lock); + /****************************/ + malloc_mutex_lock(&bin->lock); + if (config_stats) + bin->stats.curruns--; +} + +static void +arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin) +{ + + /* + * Make sure that if bin->runcur is non-NULL, it refers to the lowest + * non-full run. It is okay to NULL runcur out rather than proactively + * keeping it pointing at the lowest non-full run. + */ + if ((uintptr_t)run < (uintptr_t)bin->runcur) { + /* Switch runcur. */ + if (bin->runcur->nfree > 0) + arena_bin_runs_insert(bin, bin->runcur); + bin->runcur = run; + if (config_stats) + bin->stats.reruns++; + } else + arena_bin_runs_insert(bin, run); +} + +void +arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, + arena_chunk_map_t *mapelm) +{ + size_t pageind; + arena_run_t *run; + arena_bin_t *bin; + size_t size; + + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + (mapelm->bits >> LG_PAGE)) << LG_PAGE)); + bin = run->bin; + size_t binind = arena_bin_index(arena, bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + if (config_fill || config_stats) + size = bin_info->reg_size; + + if (config_fill && opt_junk) + arena_dalloc_junk_small(ptr, bin_info); + + arena_run_reg_dalloc(run, ptr); + if (run->nfree == bin_info->nregs) { + arena_dissociate_bin_run(chunk, run, bin); + arena_dalloc_bin_run(arena, chunk, run, bin); + } else if (run->nfree == 1 && run != bin->runcur) + arena_bin_lower_run(arena, chunk, run, bin); + + if (config_stats) { + bin->stats.allocated -= size; + bin->stats.ndalloc++; + } +} + +void +arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, + arena_stats_t *astats, malloc_bin_stats_t *bstats, + malloc_large_stats_t *lstats) +{ + unsigned i; + + malloc_mutex_lock(&arena->lock); + *nactive += arena->nactive; + *ndirty += arena->ndirty; + + astats->mapped += arena->stats.mapped; + astats->npurge += arena->stats.npurge; + astats->nmadvise += arena->stats.nmadvise; + astats->purged += arena->stats.purged; + astats->allocated_large += arena->stats.allocated_large; + astats->nmalloc_large += arena->stats.nmalloc_large; + astats->ndalloc_large += arena->stats.ndalloc_large; + astats->nrequests_large += arena->stats.nrequests_large; + + for (i = 0; i < nlclasses; i++) { + lstats[i].nmalloc += arena->stats.lstats[i].nmalloc; + lstats[i].ndalloc += arena->stats.lstats[i].ndalloc; + lstats[i].nrequests += arena->stats.lstats[i].nrequests; + lstats[i].curruns += arena->stats.lstats[i].curruns; + } + malloc_mutex_unlock(&arena->lock); + + for (i = 0; i < NBINS; i++) { + arena_bin_t *bin = &arena->bins[i]; + + malloc_mutex_lock(&bin->lock); + bstats[i].allocated += bin->stats.allocated; + bstats[i].nmalloc += bin->stats.nmalloc; + bstats[i].ndalloc += bin->stats.ndalloc; + bstats[i].nrequests += bin->stats.nrequests; + if (config_tcache) { + bstats[i].nfills += bin->stats.nfills; + bstats[i].nflushes += bin->stats.nflushes; + } + bstats[i].nruns += bin->stats.nruns; + bstats[i].reruns += bin->stats.reruns; + bstats[i].curruns += bin->stats.curruns; + malloc_mutex_unlock(&bin->lock); + } +} + +void +arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) +{ + + if (config_fill || config_stats) { + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + size_t size = chunk->map[pageind-map_bias].bits & ~PAGE_MASK; + + if (config_fill && config_stats && opt_junk) + memset(ptr, 0x5a, size); + if (config_stats) { + arena->stats.ndalloc_large++; + arena->stats.allocated_large -= size; + arena->stats.lstats[(size >> LG_PAGE) - 1].ndalloc++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns--; + } + } + + arena_run_dalloc(arena, (arena_run_t *)ptr, true); +} + +static void +arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t oldsize, size_t size) +{ + + assert(size < oldsize); + + /* + * Shrink the run, and make trailing pages available for other + * allocations. + */ + malloc_mutex_lock(&arena->lock); + arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size, + true); + if (config_stats) { + arena->stats.ndalloc_large++; + arena->stats.allocated_large -= oldsize; + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++; + arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--; + + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; + arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + } + malloc_mutex_unlock(&arena->lock); +} + +static bool +arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t oldsize, size_t size, size_t extra, bool zero) +{ + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE; + size_t npages = oldsize >> LG_PAGE; + size_t followsize; + + assert(oldsize == (chunk->map[pageind-map_bias].bits & ~PAGE_MASK)); + + /* Try to extend the run. */ + assert(size + extra > oldsize); + malloc_mutex_lock(&arena->lock); + if (pageind + npages < chunk_npages && + (chunk->map[pageind+npages-map_bias].bits + & CHUNK_MAP_ALLOCATED) == 0 && (followsize = + chunk->map[pageind+npages-map_bias].bits & ~PAGE_MASK) >= size - + oldsize) { + /* + * The next run is available and sufficiently large. Split the + * following run, then merge the first part with the existing + * allocation. + */ + size_t flag_dirty; + size_t splitsize = (oldsize + followsize <= size + extra) + ? followsize : size + extra - oldsize; + arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk + + ((pageind+npages) << LG_PAGE)), splitsize, true, zero); + + size = oldsize + splitsize; + npages = size >> LG_PAGE; + + /* + * Mark the extended run as dirty if either portion of the run + * was dirty before allocation. This is rather pedantic, + * because there's not actually any sequence of events that + * could cause the resulting run to be passed to + * arena_run_dalloc() with the dirty argument set to false + * (which is when dirty flag consistency would really matter). + */ + flag_dirty = (chunk->map[pageind-map_bias].bits & + CHUNK_MAP_DIRTY) | + (chunk->map[pageind+npages-1-map_bias].bits & + CHUNK_MAP_DIRTY); + chunk->map[pageind-map_bias].bits = size | flag_dirty + | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + chunk->map[pageind+npages-1-map_bias].bits = flag_dirty | + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + + if (config_stats) { + arena->stats.ndalloc_large++; + arena->stats.allocated_large -= oldsize; + arena->stats.lstats[(oldsize >> LG_PAGE) + - 1].ndalloc++; + arena->stats.lstats[(oldsize >> LG_PAGE) + - 1].curruns--; + + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++; + arena->stats.lstats[(size >> LG_PAGE) + - 1].nrequests++; + arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++; + } + malloc_mutex_unlock(&arena->lock); + return (false); + } + malloc_mutex_unlock(&arena->lock); + + return (true); +} + +/* + * Try to resize a large allocation, in order to avoid copying. This will + * always fail if growing an object, and the following run is already in use. + */ +static bool +arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, + bool zero) +{ + size_t psize; + + psize = PAGE_CEILING(size + extra); + if (psize == oldsize) { + /* Same size class. */ + if (config_fill && opt_junk && size < oldsize) { + memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - + size); + } + return (false); + } else { + arena_chunk_t *chunk; + arena_t *arena; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena = chunk->arena; + + if (psize < oldsize) { + /* Fill before shrinking in order avoid a race. */ + if (config_fill && opt_junk) { + memset((void *)((uintptr_t)ptr + size), 0x5a, + oldsize - size); + } + arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, + psize); + return (false); + } else { + bool ret = arena_ralloc_large_grow(arena, chunk, ptr, + oldsize, PAGE_CEILING(size), + psize - PAGE_CEILING(size), zero); + if (config_fill && ret == false && zero == false && + opt_zero) { + memset((void *)((uintptr_t)ptr + oldsize), 0, + size - oldsize); + } + return (ret); + } + } +} + +void * +arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, + bool zero) +{ + + /* + * Avoid moving the allocation if the size class can be left the same. + */ + if (oldsize <= arena_maxclass) { + if (oldsize <= SMALL_MAXCLASS) { + assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size + == oldsize); + if ((size + extra <= SMALL_MAXCLASS && + SMALL_SIZE2BIN(size + extra) == + SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && + size + extra >= oldsize)) { + if (config_fill && opt_junk && size < oldsize) { + memset((void *)((uintptr_t)ptr + size), + 0x5a, oldsize - size); + } + return (ptr); + } + } else { + assert(size <= arena_maxclass); + if (size + extra > SMALL_MAXCLASS) { + if (arena_ralloc_large(ptr, oldsize, size, + extra, zero) == false) + return (ptr); + } + } + } + + /* Reallocation would require a move. */ + return (NULL); +} + +void * +arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero, bool try_tcache) +{ + void *ret; + size_t copysize; + + /* Try to avoid moving the allocation. */ + ret = arena_ralloc_no_move(ptr, oldsize, size, extra, zero); + if (ret != NULL) + return (ret); + + /* + * size and oldsize are different enough that we need to move the + * object. In that case, fall back to allocating new space and + * copying. + */ + if (alignment != 0) { + size_t usize = sa2u(size + extra, alignment); + if (usize == 0) + return (NULL); + ret = ipalloc(usize, alignment, zero); + } else + ret = arena_malloc(NULL, size + extra, zero, try_tcache); + + if (ret == NULL) { + if (extra == 0) + return (NULL); + /* Try again, this time without extra. */ + if (alignment != 0) { + size_t usize = sa2u(size, alignment); + if (usize == 0) + return (NULL); + ret = ipalloc(usize, alignment, zero); + } else + ret = arena_malloc(NULL, size, zero, try_tcache); + + if (ret == NULL) + return (NULL); + } + + /* Junk/zero-filling were already done by ipalloc()/arena_malloc(). */ + + /* + * Copy at most size bytes (not size+extra), since the caller has no + * expectation that the extra bytes will be reliably preserved. + */ + copysize = (size < oldsize) ? size : oldsize; + memcpy(ret, ptr, copysize); + iqalloc(ptr); + return (ret); +} + +bool +arena_new(arena_t *arena, unsigned ind) +{ + unsigned i; + arena_bin_t *bin; + + arena->ind = ind; + arena->nthreads = 0; + + if (malloc_mutex_init(&arena->lock)) + return (true); + + if (config_stats) { + memset(&arena->stats, 0, sizeof(arena_stats_t)); + arena->stats.lstats = + (malloc_large_stats_t *)base_alloc(nlclasses * + sizeof(malloc_large_stats_t)); + if (arena->stats.lstats == NULL) + return (true); + memset(arena->stats.lstats, 0, nlclasses * + sizeof(malloc_large_stats_t)); + if (config_tcache) + ql_new(&arena->tcache_ql); + } + + if (config_prof) + arena->prof_accumbytes = 0; + + /* Initialize chunks. */ + ql_new(&arena->chunks_dirty); + arena->spare = NULL; + + arena->nactive = 0; + arena->ndirty = 0; + arena->npurgatory = 0; + + arena_avail_tree_new(&arena->runs_avail_clean); + arena_avail_tree_new(&arena->runs_avail_dirty); + + /* Initialize bins. */ + for (i = 0; i < NBINS; i++) { + bin = &arena->bins[i]; + if (malloc_mutex_init(&bin->lock)) + return (true); + bin->runcur = NULL; + arena_run_tree_new(&bin->runs); + if (config_stats) + memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); + } + + return (false); +} + +/* + * Calculate bin_info->run_size such that it meets the following constraints: + * + * *) bin_info->run_size >= min_run_size + * *) bin_info->run_size <= arena_maxclass + * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed). + * *) bin_info->nregs <= RUN_MAXREGS + * + * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also + * calculated here, since these settings are all interdependent. + */ +static size_t +bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) +{ + size_t pad_size; + size_t try_run_size, good_run_size; + uint32_t try_nregs, good_nregs; + uint32_t try_hdr_size, good_hdr_size; + uint32_t try_bitmap_offset, good_bitmap_offset; + uint32_t try_ctx0_offset, good_ctx0_offset; + uint32_t try_redzone0_offset, good_redzone0_offset; + + assert(min_run_size >= PAGE); + assert(min_run_size <= arena_maxclass); + + /* + * Determine redzone size based on minimum alignment and minimum + * redzone size. Add padding to the end of the run if it is needed to + * align the regions. The padding allows each redzone to be half the + * minimum alignment; without the padding, each redzone would have to + * be twice as large in order to maintain alignment. + */ + if (config_fill && opt_redzone) { + size_t align_min = ZU(1) << (ffs(bin_info->reg_size) - 1); + if (align_min <= REDZONE_MINSIZE) { + bin_info->redzone_size = REDZONE_MINSIZE; + pad_size = 0; + } else { + bin_info->redzone_size = align_min >> 1; + pad_size = bin_info->redzone_size; + } + } else { + bin_info->redzone_size = 0; + pad_size = 0; + } + bin_info->reg_interval = bin_info->reg_size + + (bin_info->redzone_size << 1); + + /* + * Calculate known-valid settings before entering the run_size + * expansion loop, so that the first part of the loop always copies + * valid settings. + * + * The do..while loop iteratively reduces the number of regions until + * the run header and the regions no longer overlap. A closed formula + * would be quite messy, since there is an interdependency between the + * header's mask length and the number of regions. + */ + try_run_size = min_run_size; + try_nregs = ((try_run_size - sizeof(arena_run_t)) / + bin_info->reg_interval) + + 1; /* Counter-act try_nregs-- in loop. */ + if (try_nregs > RUN_MAXREGS) { + try_nregs = RUN_MAXREGS + + 1; /* Counter-act try_nregs-- in loop. */ + } + do { + try_nregs--; + try_hdr_size = sizeof(arena_run_t); + /* Pad to a long boundary. */ + try_hdr_size = LONG_CEILING(try_hdr_size); + try_bitmap_offset = try_hdr_size; + /* Add space for bitmap. */ + try_hdr_size += bitmap_size(try_nregs); + if (config_prof && opt_prof && prof_promote == false) { + /* Pad to a quantum boundary. */ + try_hdr_size = QUANTUM_CEILING(try_hdr_size); + try_ctx0_offset = try_hdr_size; + /* Add space for one (prof_ctx_t *) per region. */ + try_hdr_size += try_nregs * sizeof(prof_ctx_t *); + } else + try_ctx0_offset = 0; + try_redzone0_offset = try_run_size - (try_nregs * + bin_info->reg_interval) - pad_size; + } while (try_hdr_size > try_redzone0_offset); + + /* run_size expansion loop. */ + do { + /* + * Copy valid settings before trying more aggressive settings. + */ + good_run_size = try_run_size; + good_nregs = try_nregs; + good_hdr_size = try_hdr_size; + good_bitmap_offset = try_bitmap_offset; + good_ctx0_offset = try_ctx0_offset; + good_redzone0_offset = try_redzone0_offset; + + /* Try more aggressive settings. */ + try_run_size += PAGE; + try_nregs = ((try_run_size - sizeof(arena_run_t) - pad_size) / + bin_info->reg_interval) + + 1; /* Counter-act try_nregs-- in loop. */ + if (try_nregs > RUN_MAXREGS) { + try_nregs = RUN_MAXREGS + + 1; /* Counter-act try_nregs-- in loop. */ + } + do { + try_nregs--; + try_hdr_size = sizeof(arena_run_t); + /* Pad to a long boundary. */ + try_hdr_size = LONG_CEILING(try_hdr_size); + try_bitmap_offset = try_hdr_size; + /* Add space for bitmap. */ + try_hdr_size += bitmap_size(try_nregs); + if (config_prof && opt_prof && prof_promote == false) { + /* Pad to a quantum boundary. */ + try_hdr_size = QUANTUM_CEILING(try_hdr_size); + try_ctx0_offset = try_hdr_size; + /* + * Add space for one (prof_ctx_t *) per region. + */ + try_hdr_size += try_nregs * + sizeof(prof_ctx_t *); + } + try_redzone0_offset = try_run_size - (try_nregs * + bin_info->reg_interval) - pad_size; + } while (try_hdr_size > try_redzone0_offset); + } while (try_run_size <= arena_maxclass + && try_run_size <= arena_maxclass + && RUN_MAX_OVRHD * (bin_info->reg_interval << 3) > + RUN_MAX_OVRHD_RELAX + && (try_redzone0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size + && try_nregs < RUN_MAXREGS); + + assert(good_hdr_size <= good_redzone0_offset); + + /* Copy final settings. */ + bin_info->run_size = good_run_size; + bin_info->nregs = good_nregs; + bin_info->bitmap_offset = good_bitmap_offset; + bin_info->ctx0_offset = good_ctx0_offset; + bin_info->reg0_offset = good_redzone0_offset + bin_info->redzone_size; + + assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs + * bin_info->reg_interval) + pad_size == bin_info->run_size); + + return (good_run_size); +} + +static void +bin_info_init(void) +{ + arena_bin_info_t *bin_info; + size_t prev_run_size = PAGE; + +#define SIZE_CLASS(bin, delta, size) \ + bin_info = &arena_bin_info[bin]; \ + bin_info->reg_size = size; \ + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);\ + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); + SIZE_CLASSES +#undef SIZE_CLASS +} + +void +arena_boot(void) +{ + size_t header_size; + unsigned i; + + /* + * Compute the header size such that it is large enough to contain the + * page map. The page map is biased to omit entries for the header + * itself, so some iteration is necessary to compute the map bias. + * + * 1) Compute safe header_size and map_bias values that include enough + * space for an unbiased page map. + * 2) Refine map_bias based on (1) to omit the header pages in the page + * map. The resulting map_bias may be one too small. + * 3) Refine map_bias based on (2). The result will be >= the result + * from (2), and will always be correct. + */ + map_bias = 0; + for (i = 0; i < 3; i++) { + header_size = offsetof(arena_chunk_t, map) + + (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias)); + map_bias = (header_size >> LG_PAGE) + ((header_size & PAGE_MASK) + != 0); + } + assert(map_bias > 0); + + arena_maxclass = chunksize - (map_bias << LG_PAGE); + + bin_info_init(); +} + +void +arena_prefork(arena_t *arena) +{ + unsigned i; + + malloc_mutex_prefork(&arena->lock); + for (i = 0; i < NBINS; i++) + malloc_mutex_prefork(&arena->bins[i].lock); +} + +void +arena_postfork_parent(arena_t *arena) +{ + unsigned i; + + for (i = 0; i < NBINS; i++) + malloc_mutex_postfork_parent(&arena->bins[i].lock); + malloc_mutex_postfork_parent(&arena->lock); +} + +void +arena_postfork_child(arena_t *arena) +{ + unsigned i; + + for (i = 0; i < NBINS; i++) + malloc_mutex_postfork_child(&arena->bins[i].lock); + malloc_mutex_postfork_child(&arena->lock); +} diff --git a/contrib/jemalloc/src/atomic.c b/contrib/jemalloc/src/atomic.c new file mode 100644 index 000000000000..77ee313113be --- /dev/null +++ b/contrib/jemalloc/src/atomic.c @@ -0,0 +1,2 @@ +#define JEMALLOC_ATOMIC_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/contrib/jemalloc/src/base.c b/contrib/jemalloc/src/base.c new file mode 100644 index 000000000000..bafaa7438016 --- /dev/null +++ b/contrib/jemalloc/src/base.c @@ -0,0 +1,138 @@ +#define JEMALLOC_BASE_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +static malloc_mutex_t base_mtx; + +/* + * Current pages that are being used for internal memory allocations. These + * pages are carved up in cacheline-size quanta, so that there is no chance of + * false cache line sharing. + */ +static void *base_pages; +static void *base_next_addr; +static void *base_past_addr; /* Addr immediately past base_pages. */ +static extent_node_t *base_nodes; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static bool base_pages_alloc(size_t minsize); + +/******************************************************************************/ + +static bool +base_pages_alloc(size_t minsize) +{ + size_t csize; + bool zero; + + assert(minsize != 0); + csize = CHUNK_CEILING(minsize); + zero = false; + base_pages = chunk_alloc(csize, chunksize, true, &zero); + if (base_pages == NULL) + return (true); + base_next_addr = base_pages; + base_past_addr = (void *)((uintptr_t)base_pages + csize); + + return (false); +} + +void * +base_alloc(size_t size) +{ + void *ret; + size_t csize; + + /* Round size up to nearest multiple of the cacheline size. */ + csize = CACHELINE_CEILING(size); + + malloc_mutex_lock(&base_mtx); + /* Make sure there's enough space for the allocation. */ + if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) { + if (base_pages_alloc(csize)) { + malloc_mutex_unlock(&base_mtx); + return (NULL); + } + } + /* Allocate. */ + ret = base_next_addr; + base_next_addr = (void *)((uintptr_t)base_next_addr + csize); + malloc_mutex_unlock(&base_mtx); + + return (ret); +} + +void * +base_calloc(size_t number, size_t size) +{ + void *ret = base_alloc(number * size); + + if (ret != NULL) + memset(ret, 0, number * size); + + return (ret); +} + +extent_node_t * +base_node_alloc(void) +{ + extent_node_t *ret; + + malloc_mutex_lock(&base_mtx); + if (base_nodes != NULL) { + ret = base_nodes; + base_nodes = *(extent_node_t **)ret; + malloc_mutex_unlock(&base_mtx); + } else { + malloc_mutex_unlock(&base_mtx); + ret = (extent_node_t *)base_alloc(sizeof(extent_node_t)); + } + + return (ret); +} + +void +base_node_dealloc(extent_node_t *node) +{ + + malloc_mutex_lock(&base_mtx); + *(extent_node_t **)node = base_nodes; + base_nodes = node; + malloc_mutex_unlock(&base_mtx); +} + +bool +base_boot(void) +{ + + base_nodes = NULL; + if (malloc_mutex_init(&base_mtx)) + return (true); + + return (false); +} + +void +base_prefork(void) +{ + + malloc_mutex_prefork(&base_mtx); +} + +void +base_postfork_parent(void) +{ + + malloc_mutex_postfork_parent(&base_mtx); +} + +void +base_postfork_child(void) +{ + + malloc_mutex_postfork_child(&base_mtx); +} diff --git a/contrib/jemalloc/src/bitmap.c b/contrib/jemalloc/src/bitmap.c new file mode 100644 index 000000000000..b47e2629093f --- /dev/null +++ b/contrib/jemalloc/src/bitmap.c @@ -0,0 +1,90 @@ +#define JEMALLOC_BITMAP_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static size_t bits2groups(size_t nbits); + +/******************************************************************************/ + +static size_t +bits2groups(size_t nbits) +{ + + return ((nbits >> LG_BITMAP_GROUP_NBITS) + + !!(nbits & BITMAP_GROUP_NBITS_MASK)); +} + +void +bitmap_info_init(bitmap_info_t *binfo, size_t nbits) +{ + unsigned i; + size_t group_count; + + assert(nbits > 0); + assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS)); + + /* + * Compute the number of groups necessary to store nbits bits, and + * progressively work upward through the levels until reaching a level + * that requires only one group. + */ + binfo->levels[0].group_offset = 0; + group_count = bits2groups(nbits); + for (i = 1; group_count > 1; i++) { + assert(i < BITMAP_MAX_LEVELS); + binfo->levels[i].group_offset = binfo->levels[i-1].group_offset + + group_count; + group_count = bits2groups(group_count); + } + binfo->levels[i].group_offset = binfo->levels[i-1].group_offset + + group_count; + binfo->nlevels = i; + binfo->nbits = nbits; +} + +size_t +bitmap_info_ngroups(const bitmap_info_t *binfo) +{ + + return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP); +} + +size_t +bitmap_size(size_t nbits) +{ + bitmap_info_t binfo; + + bitmap_info_init(&binfo, nbits); + return (bitmap_info_ngroups(&binfo)); +} + +void +bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + size_t extra; + unsigned i; + + /* + * Bits are actually inverted with regard to the external bitmap + * interface, so the bitmap starts out with all 1 bits, except for + * trailing unused bits (if any). Note that each group uses bit 0 to + * correspond to the first logical bit in the group, so extra bits + * are the most significant bits of the last group. + */ + memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset << + LG_SIZEOF_BITMAP); + extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK)) + & BITMAP_GROUP_NBITS_MASK; + if (extra != 0) + bitmap[binfo->levels[1].group_offset - 1] >>= extra; + for (i = 1; i < binfo->nlevels; i++) { + size_t group_count = binfo->levels[i].group_offset - + binfo->levels[i-1].group_offset; + extra = (BITMAP_GROUP_NBITS - (group_count & + BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK; + if (extra != 0) + bitmap[binfo->levels[i+1].group_offset - 1] >>= extra; + } +} diff --git a/contrib/jemalloc/src/chunk.c b/contrib/jemalloc/src/chunk.c new file mode 100644 index 000000000000..67e0d503289f --- /dev/null +++ b/contrib/jemalloc/src/chunk.c @@ -0,0 +1,304 @@ +#define JEMALLOC_CHUNK_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +size_t opt_lg_chunk = LG_CHUNK_DEFAULT; + +malloc_mutex_t chunks_mtx; +chunk_stats_t stats_chunks; + +/* + * Trees of chunks that were previously allocated (trees differ only in node + * ordering). These are used when allocating chunks, in an attempt to re-use + * address space. Depending on function, different tree orderings are needed, + * which is why there are two trees with the same contents. + */ +static extent_tree_t chunks_szad; +static extent_tree_t chunks_ad; + +rtree_t *chunks_rtree; + +/* Various chunk-related settings. */ +size_t chunksize; +size_t chunksize_mask; /* (chunksize - 1). */ +size_t chunk_npages; +size_t map_bias; +size_t arena_maxclass; /* Max size class for arenas. */ + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void *chunk_recycle(size_t size, size_t alignment, bool *zero); +static void chunk_record(void *chunk, size_t size); + +/******************************************************************************/ + +static void * +chunk_recycle(size_t size, size_t alignment, bool *zero) +{ + void *ret; + extent_node_t *node; + extent_node_t key; + size_t alloc_size, leadsize, trailsize; + + alloc_size = size + alignment - chunksize; + /* Beware size_t wrap-around. */ + if (alloc_size < size) + return (NULL); + key.addr = NULL; + key.size = alloc_size; + malloc_mutex_lock(&chunks_mtx); + node = extent_tree_szad_nsearch(&chunks_szad, &key); + if (node == NULL) { + malloc_mutex_unlock(&chunks_mtx); + return (NULL); + } + leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) - + (uintptr_t)node->addr; + assert(alloc_size >= leadsize + size); + trailsize = alloc_size - leadsize - size; + ret = (void *)((uintptr_t)node->addr + leadsize); + /* Remove node from the tree. */ + extent_tree_szad_remove(&chunks_szad, node); + extent_tree_ad_remove(&chunks_ad, node); + if (leadsize != 0) { + /* Insert the leading space as a smaller chunk. */ + node->size = leadsize; + extent_tree_szad_insert(&chunks_szad, node); + extent_tree_ad_insert(&chunks_ad, node); + node = NULL; + } + if (trailsize != 0) { + /* Insert the trailing space as a smaller chunk. */ + if (node == NULL) { + /* + * An additional node is required, but + * base_node_alloc() can cause a new base chunk to be + * allocated. Drop chunks_mtx in order to avoid + * deadlock, and if node allocation fails, deallocate + * the result before returning an error. + */ + malloc_mutex_unlock(&chunks_mtx); + node = base_node_alloc(); + if (node == NULL) { + chunk_dealloc(ret, size, true); + return (NULL); + } + malloc_mutex_lock(&chunks_mtx); + } + node->addr = (void *)((uintptr_t)(ret) + size); + node->size = trailsize; + extent_tree_szad_insert(&chunks_szad, node); + extent_tree_ad_insert(&chunks_ad, node); + node = NULL; + } + malloc_mutex_unlock(&chunks_mtx); + + if (node != NULL) + base_node_dealloc(node); +#ifdef JEMALLOC_PURGE_MADVISE_FREE + if (*zero) { + VALGRIND_MAKE_MEM_UNDEFINED(ret, size); + memset(ret, 0, size); + } +#endif + return (ret); +} + +/* + * If the caller specifies (*zero == false), it is still possible to receive + * zeroed memory, in which case *zero is toggled to true. arena_chunk_alloc() + * takes advantage of this to avoid demanding zeroed chunks, but taking + * advantage of them if they are returned. + */ +void * +chunk_alloc(size_t size, size_t alignment, bool base, bool *zero) +{ + void *ret; + + assert(size != 0); + assert((size & chunksize_mask) == 0); + assert((alignment & chunksize_mask) == 0); + + ret = chunk_recycle(size, alignment, zero); + if (ret != NULL) + goto label_return; + if (config_dss) { + ret = chunk_alloc_dss(size, alignment, zero); + if (ret != NULL) + goto label_return; + } + ret = chunk_alloc_mmap(size, alignment); + if (ret != NULL) { + *zero = true; + goto label_return; + } + + /* All strategies for allocation failed. */ + ret = NULL; +label_return: + if (config_ivsalloc && base == false && ret != NULL) { + if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { + chunk_dealloc(ret, size, true); + return (NULL); + } + } + if ((config_stats || config_prof) && ret != NULL) { + bool gdump; + malloc_mutex_lock(&chunks_mtx); + if (config_stats) + stats_chunks.nchunks += (size / chunksize); + stats_chunks.curchunks += (size / chunksize); + if (stats_chunks.curchunks > stats_chunks.highchunks) { + stats_chunks.highchunks = stats_chunks.curchunks; + if (config_prof) + gdump = true; + } else if (config_prof) + gdump = false; + malloc_mutex_unlock(&chunks_mtx); + if (config_prof && opt_prof && opt_prof_gdump && gdump) + prof_gdump(); + } + + assert(CHUNK_ADDR2BASE(ret) == ret); + return (ret); +} + +static void +chunk_record(void *chunk, size_t size) +{ + extent_node_t *xnode, *node, *prev, key; + + madvise(chunk, size, JEMALLOC_MADV_PURGE); + + xnode = NULL; + malloc_mutex_lock(&chunks_mtx); + while (true) { + key.addr = (void *)((uintptr_t)chunk + size); + node = extent_tree_ad_nsearch(&chunks_ad, &key); + /* Try to coalesce forward. */ + if (node != NULL && node->addr == key.addr) { + /* + * Coalesce chunk with the following address range. + * This does not change the position within chunks_ad, + * so only remove/insert from/into chunks_szad. + */ + extent_tree_szad_remove(&chunks_szad, node); + node->addr = chunk; + node->size += size; + extent_tree_szad_insert(&chunks_szad, node); + break; + } else if (xnode == NULL) { + /* + * It is possible that base_node_alloc() will cause a + * new base chunk to be allocated, so take care not to + * deadlock on chunks_mtx, and recover if another thread + * deallocates an adjacent chunk while this one is busy + * allocating xnode. + */ + malloc_mutex_unlock(&chunks_mtx); + xnode = base_node_alloc(); + if (xnode == NULL) + return; + malloc_mutex_lock(&chunks_mtx); + } else { + /* Coalescing forward failed, so insert a new node. */ + node = xnode; + xnode = NULL; + node->addr = chunk; + node->size = size; + extent_tree_ad_insert(&chunks_ad, node); + extent_tree_szad_insert(&chunks_szad, node); + break; + } + } + /* Discard xnode if it ended up unused due to a race. */ + if (xnode != NULL) + base_node_dealloc(xnode); + + /* Try to coalesce backward. */ + prev = extent_tree_ad_prev(&chunks_ad, node); + if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == + chunk) { + /* + * Coalesce chunk with the previous address range. This does + * not change the position within chunks_ad, so only + * remove/insert node from/into chunks_szad. + */ + extent_tree_szad_remove(&chunks_szad, prev); + extent_tree_ad_remove(&chunks_ad, prev); + + extent_tree_szad_remove(&chunks_szad, node); + node->addr = prev->addr; + node->size += prev->size; + extent_tree_szad_insert(&chunks_szad, node); + + base_node_dealloc(prev); + } + malloc_mutex_unlock(&chunks_mtx); +} + +void +chunk_dealloc(void *chunk, size_t size, bool unmap) +{ + + assert(chunk != NULL); + assert(CHUNK_ADDR2BASE(chunk) == chunk); + assert(size != 0); + assert((size & chunksize_mask) == 0); + + if (config_ivsalloc) + rtree_set(chunks_rtree, (uintptr_t)chunk, NULL); + if (config_stats || config_prof) { + malloc_mutex_lock(&chunks_mtx); + stats_chunks.curchunks -= (size / chunksize); + malloc_mutex_unlock(&chunks_mtx); + } + + if (unmap) { + if (chunk_dealloc_mmap(chunk, size) == false) + return; + chunk_record(chunk, size); + } +} + +bool +chunk_boot0(void) +{ + + /* Set variables according to the value of opt_lg_chunk. */ + chunksize = (ZU(1) << opt_lg_chunk); + assert(chunksize >= PAGE); + chunksize_mask = chunksize - 1; + chunk_npages = (chunksize >> LG_PAGE); + + if (config_stats || config_prof) { + if (malloc_mutex_init(&chunks_mtx)) + return (true); + memset(&stats_chunks, 0, sizeof(chunk_stats_t)); + } + if (config_dss && chunk_dss_boot()) + return (true); + extent_tree_szad_new(&chunks_szad); + extent_tree_ad_new(&chunks_ad); + if (config_ivsalloc) { + chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - + opt_lg_chunk); + if (chunks_rtree == NULL) + return (true); + } + + return (false); +} + +bool +chunk_boot1(void) +{ + + if (chunk_mmap_boot()) + return (true); + + return (false); +} diff --git a/contrib/jemalloc/src/chunk_dss.c b/contrib/jemalloc/src/chunk_dss.c new file mode 100644 index 000000000000..b05509a55804 --- /dev/null +++ b/contrib/jemalloc/src/chunk_dss.c @@ -0,0 +1,159 @@ +#define JEMALLOC_CHUNK_DSS_C_ +#include "jemalloc/internal/jemalloc_internal.h" +/******************************************************************************/ +/* Data. */ + +/* + * Protects sbrk() calls. This avoids malloc races among threads, though it + * does not protect against races with threads that call sbrk() directly. + */ +static malloc_mutex_t dss_mtx; + +/* Base address of the DSS. */ +static void *dss_base; +/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */ +static void *dss_prev; +/* Current upper limit on DSS addresses. */ +static void *dss_max; + +/******************************************************************************/ + +#ifndef JEMALLOC_HAVE_SBRK +static void * +sbrk(intptr_t increment) +{ + + not_implemented(); + + return (NULL); +} +#endif + +void * +chunk_alloc_dss(size_t size, size_t alignment, bool *zero) +{ + void *ret; + + cassert(config_dss); + assert(size > 0 && (size & chunksize_mask) == 0); + assert(alignment > 0 && (alignment & chunksize_mask) == 0); + + /* + * sbrk() uses a signed increment argument, so take care not to + * interpret a huge allocation request as a negative increment. + */ + if ((intptr_t)size < 0) + return (NULL); + + malloc_mutex_lock(&dss_mtx); + if (dss_prev != (void *)-1) { + size_t gap_size, cpad_size; + void *cpad, *dss_next; + intptr_t incr; + + /* + * The loop is necessary to recover from races with other + * threads that are using the DSS for something other than + * malloc. + */ + do { + /* Get the current end of the DSS. */ + dss_max = sbrk(0); + /* + * Calculate how much padding is necessary to + * chunk-align the end of the DSS. + */ + gap_size = (chunksize - CHUNK_ADDR2OFFSET(dss_max)) & + chunksize_mask; + /* + * Compute how much chunk-aligned pad space (if any) is + * necessary to satisfy alignment. This space can be + * recycled for later use. + */ + cpad = (void *)((uintptr_t)dss_max + gap_size); + ret = (void *)ALIGNMENT_CEILING((uintptr_t)dss_max, + alignment); + cpad_size = (uintptr_t)ret - (uintptr_t)cpad; + dss_next = (void *)((uintptr_t)ret + size); + if ((uintptr_t)ret < (uintptr_t)dss_max || + (uintptr_t)dss_next < (uintptr_t)dss_max) { + /* Wrap-around. */ + malloc_mutex_unlock(&dss_mtx); + return (NULL); + } + incr = gap_size + cpad_size + size; + dss_prev = sbrk(incr); + if (dss_prev == dss_max) { + /* Success. */ + dss_max = dss_next; + malloc_mutex_unlock(&dss_mtx); + if (cpad_size != 0) + chunk_dealloc(cpad, cpad_size, true); + *zero = true; + return (ret); + } + } while (dss_prev != (void *)-1); + } + malloc_mutex_unlock(&dss_mtx); + + return (NULL); +} + +bool +chunk_in_dss(void *chunk) +{ + bool ret; + + cassert(config_dss); + + malloc_mutex_lock(&dss_mtx); + if ((uintptr_t)chunk >= (uintptr_t)dss_base + && (uintptr_t)chunk < (uintptr_t)dss_max) + ret = true; + else + ret = false; + malloc_mutex_unlock(&dss_mtx); + + return (ret); +} + +bool +chunk_dss_boot(void) +{ + + cassert(config_dss); + + if (malloc_mutex_init(&dss_mtx)) + return (true); + dss_base = sbrk(0); + dss_prev = dss_base; + dss_max = dss_base; + + return (false); +} + +void +chunk_dss_prefork(void) +{ + + if (config_dss) + malloc_mutex_prefork(&dss_mtx); +} + +void +chunk_dss_postfork_parent(void) +{ + + if (config_dss) + malloc_mutex_postfork_parent(&dss_mtx); +} + +void +chunk_dss_postfork_child(void) +{ + + if (config_dss) + malloc_mutex_postfork_child(&dss_mtx); +} + +/******************************************************************************/ diff --git a/contrib/jemalloc/src/chunk_mmap.c b/contrib/jemalloc/src/chunk_mmap.c new file mode 100644 index 000000000000..e11cc0e60398 --- /dev/null +++ b/contrib/jemalloc/src/chunk_mmap.c @@ -0,0 +1,207 @@ +#define JEMALLOC_CHUNK_MMAP_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +/* + * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and + * potentially avoid some system calls. + */ +malloc_tsd_data(static, mmap_unaligned, bool, false) +malloc_tsd_funcs(JEMALLOC_INLINE, mmap_unaligned, bool, false, + malloc_tsd_no_cleanup) + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void *pages_map(void *addr, size_t size); +static void pages_unmap(void *addr, size_t size); +static void *chunk_alloc_mmap_slow(size_t size, size_t alignment, + bool unaligned); + +/******************************************************************************/ + +static void * +pages_map(void *addr, size_t size) +{ + void *ret; + + /* + * We don't use MAP_FIXED here, because it can cause the *replacement* + * of existing mappings, and we only want to create new mappings. + */ + ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, + -1, 0); + assert(ret != NULL); + + if (ret == MAP_FAILED) + ret = NULL; + else if (addr != NULL && ret != addr) { + /* + * We succeeded in mapping memory, but not in the right place. + */ + if (munmap(ret, size) == -1) { + char buf[BUFERROR_BUF]; + + buferror(errno, buf, sizeof(buf)); + malloc_printf(": Error in munmap(): %s\n", buf); + if (opt_abort) + abort(); + } +} + +static void * +chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned) +{ + void *ret, *pages; + size_t alloc_size, leadsize, trailsize; + + alloc_size = size + alignment - PAGE; + /* Beware size_t wrap-around. */ + if (alloc_size < size) + return (NULL); + pages = pages_map(NULL, alloc_size); + if (pages == NULL) + return (NULL); + leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) - + (uintptr_t)pages; + assert(alloc_size >= leadsize + size); + trailsize = alloc_size - leadsize - size; + ret = (void *)((uintptr_t)pages + leadsize); + if (leadsize != 0) { + /* Note that mmap() returned an unaligned mapping. */ + unaligned = true; + pages_unmap(pages, leadsize); + } + if (trailsize != 0) + pages_unmap((void *)((uintptr_t)ret + size), trailsize); + + /* + * If mmap() returned an aligned mapping, reset mmap_unaligned so that + * the next chunk_alloc_mmap() execution tries the fast allocation + * method. + */ + if (unaligned == false && mmap_unaligned_booted) { + bool mu = false; + mmap_unaligned_tsd_set(&mu); + } + + return (ret); +} + +void * +chunk_alloc_mmap(size_t size, size_t alignment) +{ + void *ret; + + /* + * Ideally, there would be a way to specify alignment to mmap() (like + * NetBSD has), but in the absence of such a feature, we have to work + * hard to efficiently create aligned mappings. The reliable, but + * slow method is to create a mapping that is over-sized, then trim the + * excess. However, that always results in at least one call to + * pages_unmap(). + * + * A more optimistic approach is to try mapping precisely the right + * amount, then try to append another mapping if alignment is off. In + * practice, this works out well as long as the application is not + * interleaving mappings via direct mmap() calls. If we do run into a + * situation where there is an interleaved mapping and we are unable to + * extend an unaligned mapping, our best option is to switch to the + * slow method until mmap() returns another aligned mapping. This will + * tend to leave a gap in the memory map that is too small to cause + * later problems for the optimistic method. + * + * Another possible confounding factor is address space layout + * randomization (ASLR), which causes mmap(2) to disregard the + * requested address. mmap_unaligned tracks whether the previous + * chunk_alloc_mmap() execution received any unaligned or relocated + * mappings, and if so, the current execution will immediately fall + * back to the slow method. However, we keep track of whether the fast + * method would have succeeded, and if so, we make a note to try the + * fast method next time. + */ + + if (mmap_unaligned_booted && *mmap_unaligned_tsd_get() == false) { + size_t offset; + + ret = pages_map(NULL, size); + if (ret == NULL) + return (NULL); + + offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); + if (offset != 0) { + bool mu = true; + mmap_unaligned_tsd_set(&mu); + /* Try to extend chunk boundary. */ + if (pages_map((void *)((uintptr_t)ret + size), + chunksize - offset) == NULL) { + /* + * Extension failed. Clean up, then revert to + * the reliable-but-expensive method. + */ + pages_unmap(ret, size); + ret = chunk_alloc_mmap_slow(size, alignment, + true); + } else { + /* Clean up unneeded leading space. */ + pages_unmap(ret, chunksize - offset); + ret = (void *)((uintptr_t)ret + (chunksize - + offset)); + } + } + } else + ret = chunk_alloc_mmap_slow(size, alignment, false); + + return (ret); +} + +bool +chunk_dealloc_mmap(void *chunk, size_t size) +{ + + if (config_munmap) + pages_unmap(chunk, size); + + return (config_munmap == false); +} + +bool +chunk_mmap_boot(void) +{ + + /* + * XXX For the non-TLS implementation of tsd, the first access from + * each thread causes memory allocation. The result is a bootstrapping + * problem for this particular use case, so for now just disable it by + * leaving it in an unbooted state. + */ +#ifdef JEMALLOC_TLS + if (mmap_unaligned_tsd_boot()) + return (true); +#endif + + return (false); +} diff --git a/contrib/jemalloc/src/ckh.c b/contrib/jemalloc/src/ckh.c new file mode 100644 index 000000000000..742a950bea20 --- /dev/null +++ b/contrib/jemalloc/src/ckh.c @@ -0,0 +1,609 @@ +/* + ******************************************************************************* + * Implementation of (2^1+,2) cuckoo hashing, where 2^1+ indicates that each + * hash bucket contains 2^n cells, for n >= 1, and 2 indicates that two hash + * functions are employed. The original cuckoo hashing algorithm was described + * in: + * + * Pagh, R., F.F. Rodler (2004) Cuckoo Hashing. Journal of Algorithms + * 51(2):122-144. + * + * Generalization of cuckoo hashing was discussed in: + * + * Erlingsson, U., M. Manasse, F. McSherry (2006) A cool and practical + * alternative to traditional hash tables. In Proceedings of the 7th + * Workshop on Distributed Data and Structures (WDAS'06), Santa Clara, CA, + * January 2006. + * + * This implementation uses precisely two hash functions because that is the + * fewest that can work, and supporting multiple hashes is an implementation + * burden. Here is a reproduction of Figure 1 from Erlingsson et al. (2006) + * that shows approximate expected maximum load factors for various + * configurations: + * + * | #cells/bucket | + * #hashes | 1 | 2 | 4 | 8 | + * --------+-------+-------+-------+-------+ + * 1 | 0.006 | 0.006 | 0.03 | 0.12 | + * 2 | 0.49 | 0.86 |>0.93< |>0.96< | + * 3 | 0.91 | 0.97 | 0.98 | 0.999 | + * 4 | 0.97 | 0.99 | 0.999 | | + * + * The number of cells per bucket is chosen such that a bucket fits in one cache + * line. So, on 32- and 64-bit systems, we use (8,2) and (4,2) cuckoo hashing, + * respectively. + * + ******************************************************************************/ +#define JEMALLOC_CKH_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static bool ckh_grow(ckh_t *ckh); +static void ckh_shrink(ckh_t *ckh); + +/******************************************************************************/ + +/* + * Search bucket for key and return the cell number if found; SIZE_T_MAX + * otherwise. + */ +JEMALLOC_INLINE size_t +ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) +{ + ckhc_t *cell; + unsigned i; + + for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { + cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; + if (cell->key != NULL && ckh->keycomp(key, cell->key)) + return ((bucket << LG_CKH_BUCKET_CELLS) + i); + } + + return (SIZE_T_MAX); +} + +/* + * Search table for key and return cell number if found; SIZE_T_MAX otherwise. + */ +JEMALLOC_INLINE size_t +ckh_isearch(ckh_t *ckh, const void *key) +{ + size_t hash1, hash2, bucket, cell; + + assert(ckh != NULL); + + ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); + + /* Search primary bucket. */ + bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1); + cell = ckh_bucket_search(ckh, bucket, key); + if (cell != SIZE_T_MAX) + return (cell); + + /* Search secondary bucket. */ + bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1); + cell = ckh_bucket_search(ckh, bucket, key); + return (cell); +} + +JEMALLOC_INLINE bool +ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, + const void *data) +{ + ckhc_t *cell; + unsigned offset, i; + + /* + * Cycle through the cells in the bucket, starting at a random position. + * The randomness avoids worst-case search overhead as buckets fill up. + */ + prng32(offset, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); + for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { + cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + + ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))]; + if (cell->key == NULL) { + cell->key = key; + cell->data = data; + ckh->count++; + return (false); + } + } + + return (true); +} + +/* + * No space is available in bucket. Randomly evict an item, then try to find an + * alternate location for that item. Iteratively repeat this + * eviction/relocation procedure until either success or detection of an + * eviction/relocation bucket cycle. + */ +JEMALLOC_INLINE bool +ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, + void const **argdata) +{ + const void *key, *data, *tkey, *tdata; + ckhc_t *cell; + size_t hash1, hash2, bucket, tbucket; + unsigned i; + + bucket = argbucket; + key = *argkey; + data = *argdata; + while (true) { + /* + * Choose a random item within the bucket to evict. This is + * critical to correct function, because without (eventually) + * evicting all items within a bucket during iteration, it + * would be possible to get stuck in an infinite loop if there + * were an item for which both hashes indicated the same + * bucket. + */ + prng32(i, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C); + cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; + assert(cell->key != NULL); + + /* Swap cell->{key,data} and {key,data} (evict). */ + tkey = cell->key; tdata = cell->data; + cell->key = key; cell->data = data; + key = tkey; data = tdata; + +#ifdef CKH_COUNT + ckh->nrelocs++; +#endif + + /* Find the alternate bucket for the evicted item. */ + ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); + tbucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1); + if (tbucket == bucket) { + tbucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1); + /* + * It may be that (tbucket == bucket) still, if the + * item's hashes both indicate this bucket. However, + * we are guaranteed to eventually escape this bucket + * during iteration, assuming pseudo-random item + * selection (true randomness would make infinite + * looping a remote possibility). The reason we can + * never get trapped forever is that there are two + * cases: + * + * 1) This bucket == argbucket, so we will quickly + * detect an eviction cycle and terminate. + * 2) An item was evicted to this bucket from another, + * which means that at least one item in this bucket + * has hashes that indicate distinct buckets. + */ + } + /* Check for a cycle. */ + if (tbucket == argbucket) { + *argkey = key; + *argdata = data; + return (true); + } + + bucket = tbucket; + if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) + return (false); + } +} + +JEMALLOC_INLINE bool +ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) +{ + size_t hash1, hash2, bucket; + const void *key = *argkey; + const void *data = *argdata; + + ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); + + /* Try to insert in primary bucket. */ + bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1); + if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) + return (false); + + /* Try to insert in secondary bucket. */ + bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1); + if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) + return (false); + + /* + * Try to find a place for this item via iterative eviction/relocation. + */ + return (ckh_evict_reloc_insert(ckh, bucket, argkey, argdata)); +} + +/* + * Try to rebuild the hash table from scratch by inserting all items from the + * old table into the new. + */ +JEMALLOC_INLINE bool +ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) +{ + size_t count, i, nins; + const void *key, *data; + + count = ckh->count; + ckh->count = 0; + for (i = nins = 0; nins < count; i++) { + if (aTab[i].key != NULL) { + key = aTab[i].key; + data = aTab[i].data; + if (ckh_try_insert(ckh, &key, &data)) { + ckh->count = count; + return (true); + } + nins++; + } + } + + return (false); +} + +static bool +ckh_grow(ckh_t *ckh) +{ + bool ret; + ckhc_t *tab, *ttab; + size_t lg_curcells; + unsigned lg_prevbuckets; + +#ifdef CKH_COUNT + ckh->ngrows++; +#endif + + /* + * It is possible (though unlikely, given well behaved hashes) that the + * table will have to be doubled more than once in order to create a + * usable table. + */ + lg_prevbuckets = ckh->lg_curbuckets; + lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS; + while (true) { + size_t usize; + + lg_curcells++; + usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); + if (usize == 0) { + ret = true; + goto label_return; + } + tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + if (tab == NULL) { + ret = true; + goto label_return; + } + /* Swap in new table. */ + ttab = ckh->tab; + ckh->tab = tab; + tab = ttab; + ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; + + if (ckh_rebuild(ckh, tab) == false) { + idalloc(tab); + break; + } + + /* Rebuilding failed, so back out partially rebuilt table. */ + idalloc(ckh->tab); + ckh->tab = tab; + ckh->lg_curbuckets = lg_prevbuckets; + } + + ret = false; +label_return: + return (ret); +} + +static void +ckh_shrink(ckh_t *ckh) +{ + ckhc_t *tab, *ttab; + size_t lg_curcells, usize; + unsigned lg_prevbuckets; + + /* + * It is possible (though unlikely, given well behaved hashes) that the + * table rebuild will fail. + */ + lg_prevbuckets = ckh->lg_curbuckets; + lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; + usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); + if (usize == 0) + return; + tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + if (tab == NULL) { + /* + * An OOM error isn't worth propagating, since it doesn't + * prevent this or future operations from proceeding. + */ + return; + } + /* Swap in new table. */ + ttab = ckh->tab; + ckh->tab = tab; + tab = ttab; + ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; + + if (ckh_rebuild(ckh, tab) == false) { + idalloc(tab); +#ifdef CKH_COUNT + ckh->nshrinks++; +#endif + return; + } + + /* Rebuilding failed, so back out partially rebuilt table. */ + idalloc(ckh->tab); + ckh->tab = tab; + ckh->lg_curbuckets = lg_prevbuckets; +#ifdef CKH_COUNT + ckh->nshrinkfails++; +#endif +} + +bool +ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) +{ + bool ret; + size_t mincells, usize; + unsigned lg_mincells; + + assert(minitems > 0); + assert(hash != NULL); + assert(keycomp != NULL); + +#ifdef CKH_COUNT + ckh->ngrows = 0; + ckh->nshrinks = 0; + ckh->nshrinkfails = 0; + ckh->ninserts = 0; + ckh->nrelocs = 0; +#endif + ckh->prng_state = 42; /* Value doesn't really matter. */ + ckh->count = 0; + + /* + * Find the minimum power of 2 that is large enough to fit aBaseCount + * entries. We are using (2+,2) cuckoo hashing, which has an expected + * maximum load factor of at least ~0.86, so 0.75 is a conservative load + * factor that will typically allow 2^aLgMinItems to fit without ever + * growing the table. + */ + assert(LG_CKH_BUCKET_CELLS > 0); + mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2; + for (lg_mincells = LG_CKH_BUCKET_CELLS; + (ZU(1) << lg_mincells) < mincells; + lg_mincells++) + ; /* Do nothing. */ + ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS; + ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS; + ckh->hash = hash; + ckh->keycomp = keycomp; + + usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE); + if (usize == 0) { + ret = true; + goto label_return; + } + ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + if (ckh->tab == NULL) { + ret = true; + goto label_return; + } + + ret = false; +label_return: + return (ret); +} + +void +ckh_delete(ckh_t *ckh) +{ + + assert(ckh != NULL); + +#ifdef CKH_VERBOSE + malloc_printf( + "%s(%p): ngrows: %"PRIu64", nshrinks: %"PRIu64"," + " nshrinkfails: %"PRIu64", ninserts: %"PRIu64"," + " nrelocs: %"PRIu64"\n", __func__, ckh, + (unsigned long long)ckh->ngrows, + (unsigned long long)ckh->nshrinks, + (unsigned long long)ckh->nshrinkfails, + (unsigned long long)ckh->ninserts, + (unsigned long long)ckh->nrelocs); +#endif + + idalloc(ckh->tab); +#ifdef JEMALLOC_DEBUG + memset(ckh, 0x5a, sizeof(ckh_t)); +#endif +} + +size_t +ckh_count(ckh_t *ckh) +{ + + assert(ckh != NULL); + + return (ckh->count); +} + +bool +ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data) +{ + size_t i, ncells; + + for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets + + LG_CKH_BUCKET_CELLS)); i < ncells; i++) { + if (ckh->tab[i].key != NULL) { + if (key != NULL) + *key = (void *)ckh->tab[i].key; + if (data != NULL) + *data = (void *)ckh->tab[i].data; + *tabind = i + 1; + return (false); + } + } + + return (true); +} + +bool +ckh_insert(ckh_t *ckh, const void *key, const void *data) +{ + bool ret; + + assert(ckh != NULL); + assert(ckh_search(ckh, key, NULL, NULL)); + +#ifdef CKH_COUNT + ckh->ninserts++; +#endif + + while (ckh_try_insert(ckh, &key, &data)) { + if (ckh_grow(ckh)) { + ret = true; + goto label_return; + } + } + + ret = false; +label_return: + return (ret); +} + +bool +ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data) +{ + size_t cell; + + assert(ckh != NULL); + + cell = ckh_isearch(ckh, searchkey); + if (cell != SIZE_T_MAX) { + if (key != NULL) + *key = (void *)ckh->tab[cell].key; + if (data != NULL) + *data = (void *)ckh->tab[cell].data; + ckh->tab[cell].key = NULL; + ckh->tab[cell].data = NULL; /* Not necessary. */ + + ckh->count--; + /* Try to halve the table if it is less than 1/4 full. */ + if (ckh->count < (ZU(1) << (ckh->lg_curbuckets + + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets + > ckh->lg_minbuckets) { + /* Ignore error due to OOM. */ + ckh_shrink(ckh); + } + + return (false); + } + + return (true); +} + +bool +ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) +{ + size_t cell; + + assert(ckh != NULL); + + cell = ckh_isearch(ckh, searchkey); + if (cell != SIZE_T_MAX) { + if (key != NULL) + *key = (void *)ckh->tab[cell].key; + if (data != NULL) + *data = (void *)ckh->tab[cell].data; + return (false); + } + + return (true); +} + +void +ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) +{ + size_t ret1, ret2; + uint64_t h; + + assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); + assert(hash1 != NULL); + assert(hash2 != NULL); + + h = hash(key, strlen((const char *)key), UINT64_C(0x94122f335b332aea)); + if (minbits <= 32) { + /* + * Avoid doing multiple hashes, since a single hash provides + * enough bits. + */ + ret1 = h & ZU(0xffffffffU); + ret2 = h >> 32; + } else { + ret1 = h; + ret2 = hash(key, strlen((const char *)key), + UINT64_C(0x8432a476666bbc13)); + } + + *hash1 = ret1; + *hash2 = ret2; +} + +bool +ckh_string_keycomp(const void *k1, const void *k2) +{ + + assert(k1 != NULL); + assert(k2 != NULL); + + return (strcmp((char *)k1, (char *)k2) ? false : true); +} + +void +ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, + size_t *hash2) +{ + size_t ret1, ret2; + uint64_t h; + union { + const void *v; + uint64_t i; + } u; + + assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); + assert(hash1 != NULL); + assert(hash2 != NULL); + + assert(sizeof(u.v) == sizeof(u.i)); +#if (LG_SIZEOF_PTR != LG_SIZEOF_INT) + u.i = 0; +#endif + u.v = key; + h = hash(&u.i, sizeof(u.i), UINT64_C(0xd983396e68886082)); + if (minbits <= 32) { + /* + * Avoid doing multiple hashes, since a single hash provides + * enough bits. + */ + ret1 = h & ZU(0xffffffffU); + ret2 = h >> 32; + } else { + assert(SIZEOF_PTR == 8); + ret1 = h; + ret2 = hash(&u.i, sizeof(u.i), UINT64_C(0x5e2be9aff8709a5d)); + } + + *hash1 = ret1; + *hash2 = ret2; +} + +bool +ckh_pointer_keycomp(const void *k1, const void *k2) +{ + + return ((k1 == k2) ? true : false); +} diff --git a/contrib/jemalloc/src/ctl.c b/contrib/jemalloc/src/ctl.c new file mode 100644 index 000000000000..a6a02cc5db08 --- /dev/null +++ b/contrib/jemalloc/src/ctl.c @@ -0,0 +1,1385 @@ +#define JEMALLOC_CTL_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +/* + * ctl_mtx protects the following: + * - ctl_stats.* + * - opt_prof_active + */ +static malloc_mutex_t ctl_mtx; +static bool ctl_initialized; +static uint64_t ctl_epoch; +static ctl_stats_t ctl_stats; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +#define CTL_PROTO(n) \ +static int n##_ctl(const size_t *mib, size_t miblen, void *oldp, \ + size_t *oldlenp, void *newp, size_t newlen); + +#define INDEX_PROTO(n) \ +const ctl_node_t *n##_index(const size_t *mib, size_t miblen, \ + size_t i); + +static bool ctl_arena_init(ctl_arena_stats_t *astats); +static void ctl_arena_clear(ctl_arena_stats_t *astats); +static void ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, + arena_t *arena); +static void ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, + ctl_arena_stats_t *astats); +static void ctl_arena_refresh(arena_t *arena, unsigned i); +static void ctl_refresh(void); +static bool ctl_init(void); +static int ctl_lookup(const char *name, ctl_node_t const **nodesp, + size_t *mibp, size_t *depthp); + +CTL_PROTO(version) +CTL_PROTO(epoch) +CTL_PROTO(thread_tcache_enabled) +CTL_PROTO(thread_tcache_flush) +CTL_PROTO(thread_arena) +CTL_PROTO(thread_allocated) +CTL_PROTO(thread_allocatedp) +CTL_PROTO(thread_deallocated) +CTL_PROTO(thread_deallocatedp) +CTL_PROTO(config_debug) +CTL_PROTO(config_dss) +CTL_PROTO(config_fill) +CTL_PROTO(config_lazy_lock) +CTL_PROTO(config_munmap) +CTL_PROTO(config_prof) +CTL_PROTO(config_prof_libgcc) +CTL_PROTO(config_prof_libunwind) +CTL_PROTO(config_stats) +CTL_PROTO(config_tcache) +CTL_PROTO(config_tls) +CTL_PROTO(config_utrace) +CTL_PROTO(config_valgrind) +CTL_PROTO(config_xmalloc) +CTL_PROTO(opt_abort) +CTL_PROTO(opt_lg_chunk) +CTL_PROTO(opt_narenas) +CTL_PROTO(opt_lg_dirty_mult) +CTL_PROTO(opt_stats_print) +CTL_PROTO(opt_junk) +CTL_PROTO(opt_zero) +CTL_PROTO(opt_quarantine) +CTL_PROTO(opt_redzone) +CTL_PROTO(opt_utrace) +CTL_PROTO(opt_valgrind) +CTL_PROTO(opt_xmalloc) +CTL_PROTO(opt_tcache) +CTL_PROTO(opt_lg_tcache_max) +CTL_PROTO(opt_prof) +CTL_PROTO(opt_prof_prefix) +CTL_PROTO(opt_prof_active) +CTL_PROTO(opt_lg_prof_sample) +CTL_PROTO(opt_lg_prof_interval) +CTL_PROTO(opt_prof_gdump) +CTL_PROTO(opt_prof_leak) +CTL_PROTO(opt_prof_accum) +CTL_PROTO(arenas_bin_i_size) +CTL_PROTO(arenas_bin_i_nregs) +CTL_PROTO(arenas_bin_i_run_size) +INDEX_PROTO(arenas_bin_i) +CTL_PROTO(arenas_lrun_i_size) +INDEX_PROTO(arenas_lrun_i) +CTL_PROTO(arenas_narenas) +CTL_PROTO(arenas_initialized) +CTL_PROTO(arenas_quantum) +CTL_PROTO(arenas_page) +CTL_PROTO(arenas_tcache_max) +CTL_PROTO(arenas_nbins) +CTL_PROTO(arenas_nhbins) +CTL_PROTO(arenas_nlruns) +CTL_PROTO(arenas_purge) +CTL_PROTO(prof_active) +CTL_PROTO(prof_dump) +CTL_PROTO(prof_interval) +CTL_PROTO(stats_chunks_current) +CTL_PROTO(stats_chunks_total) +CTL_PROTO(stats_chunks_high) +CTL_PROTO(stats_huge_allocated) +CTL_PROTO(stats_huge_nmalloc) +CTL_PROTO(stats_huge_ndalloc) +CTL_PROTO(stats_arenas_i_small_allocated) +CTL_PROTO(stats_arenas_i_small_nmalloc) +CTL_PROTO(stats_arenas_i_small_ndalloc) +CTL_PROTO(stats_arenas_i_small_nrequests) +CTL_PROTO(stats_arenas_i_large_allocated) +CTL_PROTO(stats_arenas_i_large_nmalloc) +CTL_PROTO(stats_arenas_i_large_ndalloc) +CTL_PROTO(stats_arenas_i_large_nrequests) +CTL_PROTO(stats_arenas_i_bins_j_allocated) +CTL_PROTO(stats_arenas_i_bins_j_nmalloc) +CTL_PROTO(stats_arenas_i_bins_j_ndalloc) +CTL_PROTO(stats_arenas_i_bins_j_nrequests) +CTL_PROTO(stats_arenas_i_bins_j_nfills) +CTL_PROTO(stats_arenas_i_bins_j_nflushes) +CTL_PROTO(stats_arenas_i_bins_j_nruns) +CTL_PROTO(stats_arenas_i_bins_j_nreruns) +CTL_PROTO(stats_arenas_i_bins_j_curruns) +INDEX_PROTO(stats_arenas_i_bins_j) +CTL_PROTO(stats_arenas_i_lruns_j_nmalloc) +CTL_PROTO(stats_arenas_i_lruns_j_ndalloc) +CTL_PROTO(stats_arenas_i_lruns_j_nrequests) +CTL_PROTO(stats_arenas_i_lruns_j_curruns) +INDEX_PROTO(stats_arenas_i_lruns_j) +CTL_PROTO(stats_arenas_i_nthreads) +CTL_PROTO(stats_arenas_i_pactive) +CTL_PROTO(stats_arenas_i_pdirty) +CTL_PROTO(stats_arenas_i_mapped) +CTL_PROTO(stats_arenas_i_npurge) +CTL_PROTO(stats_arenas_i_nmadvise) +CTL_PROTO(stats_arenas_i_purged) +INDEX_PROTO(stats_arenas_i) +CTL_PROTO(stats_cactive) +CTL_PROTO(stats_allocated) +CTL_PROTO(stats_active) +CTL_PROTO(stats_mapped) + +/******************************************************************************/ +/* mallctl tree. */ + +/* Maximum tree depth. */ +#define CTL_MAX_DEPTH 6 + +#define NAME(n) true, {.named = {n +#define CHILD(c) sizeof(c##_node) / sizeof(ctl_node_t), c##_node}}, NULL +#define CTL(c) 0, NULL}}, c##_ctl + +/* + * Only handles internal indexed nodes, since there are currently no external + * ones. + */ +#define INDEX(i) false, {.indexed = {i##_index}}, NULL + +static const ctl_node_t tcache_node[] = { + {NAME("enabled"), CTL(thread_tcache_enabled)}, + {NAME("flush"), CTL(thread_tcache_flush)} +}; + +static const ctl_node_t thread_node[] = { + {NAME("arena"), CTL(thread_arena)}, + {NAME("allocated"), CTL(thread_allocated)}, + {NAME("allocatedp"), CTL(thread_allocatedp)}, + {NAME("deallocated"), CTL(thread_deallocated)}, + {NAME("deallocatedp"), CTL(thread_deallocatedp)}, + {NAME("tcache"), CHILD(tcache)} +}; + +static const ctl_node_t config_node[] = { + {NAME("debug"), CTL(config_debug)}, + {NAME("dss"), CTL(config_dss)}, + {NAME("fill"), CTL(config_fill)}, + {NAME("lazy_lock"), CTL(config_lazy_lock)}, + {NAME("munmap"), CTL(config_munmap)}, + {NAME("prof"), CTL(config_prof)}, + {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, + {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, + {NAME("stats"), CTL(config_stats)}, + {NAME("tcache"), CTL(config_tcache)}, + {NAME("tls"), CTL(config_tls)}, + {NAME("utrace"), CTL(config_utrace)}, + {NAME("valgrind"), CTL(config_valgrind)}, + {NAME("xmalloc"), CTL(config_xmalloc)} +}; + +static const ctl_node_t opt_node[] = { + {NAME("abort"), CTL(opt_abort)}, + {NAME("lg_chunk"), CTL(opt_lg_chunk)}, + {NAME("narenas"), CTL(opt_narenas)}, + {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, + {NAME("stats_print"), CTL(opt_stats_print)}, + {NAME("junk"), CTL(opt_junk)}, + {NAME("zero"), CTL(opt_zero)}, + {NAME("quarantine"), CTL(opt_quarantine)}, + {NAME("redzone"), CTL(opt_redzone)}, + {NAME("utrace"), CTL(opt_utrace)}, + {NAME("valgrind"), CTL(opt_valgrind)}, + {NAME("xmalloc"), CTL(opt_xmalloc)}, + {NAME("tcache"), CTL(opt_tcache)}, + {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, + {NAME("prof"), CTL(opt_prof)}, + {NAME("prof_prefix"), CTL(opt_prof_prefix)}, + {NAME("prof_active"), CTL(opt_prof_active)}, + {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)}, + {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, + {NAME("prof_gdump"), CTL(opt_prof_gdump)}, + {NAME("prof_leak"), CTL(opt_prof_leak)}, + {NAME("prof_accum"), CTL(opt_prof_accum)} +}; + +static const ctl_node_t arenas_bin_i_node[] = { + {NAME("size"), CTL(arenas_bin_i_size)}, + {NAME("nregs"), CTL(arenas_bin_i_nregs)}, + {NAME("run_size"), CTL(arenas_bin_i_run_size)} +}; +static const ctl_node_t super_arenas_bin_i_node[] = { + {NAME(""), CHILD(arenas_bin_i)} +}; + +static const ctl_node_t arenas_bin_node[] = { + {INDEX(arenas_bin_i)} +}; + +static const ctl_node_t arenas_lrun_i_node[] = { + {NAME("size"), CTL(arenas_lrun_i_size)} +}; +static const ctl_node_t super_arenas_lrun_i_node[] = { + {NAME(""), CHILD(arenas_lrun_i)} +}; + +static const ctl_node_t arenas_lrun_node[] = { + {INDEX(arenas_lrun_i)} +}; + +static const ctl_node_t arenas_node[] = { + {NAME("narenas"), CTL(arenas_narenas)}, + {NAME("initialized"), CTL(arenas_initialized)}, + {NAME("quantum"), CTL(arenas_quantum)}, + {NAME("page"), CTL(arenas_page)}, + {NAME("tcache_max"), CTL(arenas_tcache_max)}, + {NAME("nbins"), CTL(arenas_nbins)}, + {NAME("nhbins"), CTL(arenas_nhbins)}, + {NAME("bin"), CHILD(arenas_bin)}, + {NAME("nlruns"), CTL(arenas_nlruns)}, + {NAME("lrun"), CHILD(arenas_lrun)}, + {NAME("purge"), CTL(arenas_purge)} +}; + +static const ctl_node_t prof_node[] = { + {NAME("active"), CTL(prof_active)}, + {NAME("dump"), CTL(prof_dump)}, + {NAME("interval"), CTL(prof_interval)} +}; + +static const ctl_node_t stats_chunks_node[] = { + {NAME("current"), CTL(stats_chunks_current)}, + {NAME("total"), CTL(stats_chunks_total)}, + {NAME("high"), CTL(stats_chunks_high)} +}; + +static const ctl_node_t stats_huge_node[] = { + {NAME("allocated"), CTL(stats_huge_allocated)}, + {NAME("nmalloc"), CTL(stats_huge_nmalloc)}, + {NAME("ndalloc"), CTL(stats_huge_ndalloc)} +}; + +static const ctl_node_t stats_arenas_i_small_node[] = { + {NAME("allocated"), CTL(stats_arenas_i_small_allocated)}, + {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)} +}; + +static const ctl_node_t stats_arenas_i_large_node[] = { + {NAME("allocated"), CTL(stats_arenas_i_large_allocated)}, + {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)} +}; + +static const ctl_node_t stats_arenas_i_bins_j_node[] = { + {NAME("allocated"), CTL(stats_arenas_i_bins_j_allocated)}, + {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)}, + {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)}, + {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)}, + {NAME("nruns"), CTL(stats_arenas_i_bins_j_nruns)}, + {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)}, + {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)} +}; +static const ctl_node_t super_stats_arenas_i_bins_j_node[] = { + {NAME(""), CHILD(stats_arenas_i_bins_j)} +}; + +static const ctl_node_t stats_arenas_i_bins_node[] = { + {INDEX(stats_arenas_i_bins_j)} +}; + +static const ctl_node_t stats_arenas_i_lruns_j_node[] = { + {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)}, + {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)} +}; +static const ctl_node_t super_stats_arenas_i_lruns_j_node[] = { + {NAME(""), CHILD(stats_arenas_i_lruns_j)} +}; + +static const ctl_node_t stats_arenas_i_lruns_node[] = { + {INDEX(stats_arenas_i_lruns_j)} +}; + +static const ctl_node_t stats_arenas_i_node[] = { + {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, + {NAME("pactive"), CTL(stats_arenas_i_pactive)}, + {NAME("pdirty"), CTL(stats_arenas_i_pdirty)}, + {NAME("mapped"), CTL(stats_arenas_i_mapped)}, + {NAME("npurge"), CTL(stats_arenas_i_npurge)}, + {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)}, + {NAME("purged"), CTL(stats_arenas_i_purged)}, + {NAME("small"), CHILD(stats_arenas_i_small)}, + {NAME("large"), CHILD(stats_arenas_i_large)}, + {NAME("bins"), CHILD(stats_arenas_i_bins)}, + {NAME("lruns"), CHILD(stats_arenas_i_lruns)} +}; +static const ctl_node_t super_stats_arenas_i_node[] = { + {NAME(""), CHILD(stats_arenas_i)} +}; + +static const ctl_node_t stats_arenas_node[] = { + {INDEX(stats_arenas_i)} +}; + +static const ctl_node_t stats_node[] = { + {NAME("cactive"), CTL(stats_cactive)}, + {NAME("allocated"), CTL(stats_allocated)}, + {NAME("active"), CTL(stats_active)}, + {NAME("mapped"), CTL(stats_mapped)}, + {NAME("chunks"), CHILD(stats_chunks)}, + {NAME("huge"), CHILD(stats_huge)}, + {NAME("arenas"), CHILD(stats_arenas)} +}; + +static const ctl_node_t root_node[] = { + {NAME("version"), CTL(version)}, + {NAME("epoch"), CTL(epoch)}, + {NAME("thread"), CHILD(thread)}, + {NAME("config"), CHILD(config)}, + {NAME("opt"), CHILD(opt)}, + {NAME("arenas"), CHILD(arenas)}, + {NAME("prof"), CHILD(prof)}, + {NAME("stats"), CHILD(stats)} +}; +static const ctl_node_t super_root_node[] = { + {NAME(""), CHILD(root)} +}; + +#undef NAME +#undef CHILD +#undef CTL +#undef INDEX + +/******************************************************************************/ + +static bool +ctl_arena_init(ctl_arena_stats_t *astats) +{ + + if (astats->lstats == NULL) { + astats->lstats = (malloc_large_stats_t *)base_alloc(nlclasses * + sizeof(malloc_large_stats_t)); + if (astats->lstats == NULL) + return (true); + } + + return (false); +} + +static void +ctl_arena_clear(ctl_arena_stats_t *astats) +{ + + astats->pactive = 0; + astats->pdirty = 0; + if (config_stats) { + memset(&astats->astats, 0, sizeof(arena_stats_t)); + astats->allocated_small = 0; + astats->nmalloc_small = 0; + astats->ndalloc_small = 0; + astats->nrequests_small = 0; + memset(astats->bstats, 0, NBINS * sizeof(malloc_bin_stats_t)); + memset(astats->lstats, 0, nlclasses * + sizeof(malloc_large_stats_t)); + } +} + +static void +ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) +{ + unsigned i; + + arena_stats_merge(arena, &cstats->pactive, &cstats->pdirty, + &cstats->astats, cstats->bstats, cstats->lstats); + + for (i = 0; i < NBINS; i++) { + cstats->allocated_small += cstats->bstats[i].allocated; + cstats->nmalloc_small += cstats->bstats[i].nmalloc; + cstats->ndalloc_small += cstats->bstats[i].ndalloc; + cstats->nrequests_small += cstats->bstats[i].nrequests; + } +} + +static void +ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) +{ + unsigned i; + + sstats->pactive += astats->pactive; + sstats->pdirty += astats->pdirty; + + sstats->astats.mapped += astats->astats.mapped; + sstats->astats.npurge += astats->astats.npurge; + sstats->astats.nmadvise += astats->astats.nmadvise; + sstats->astats.purged += astats->astats.purged; + + sstats->allocated_small += astats->allocated_small; + sstats->nmalloc_small += astats->nmalloc_small; + sstats->ndalloc_small += astats->ndalloc_small; + sstats->nrequests_small += astats->nrequests_small; + + sstats->astats.allocated_large += astats->astats.allocated_large; + sstats->astats.nmalloc_large += astats->astats.nmalloc_large; + sstats->astats.ndalloc_large += astats->astats.ndalloc_large; + sstats->astats.nrequests_large += astats->astats.nrequests_large; + + for (i = 0; i < nlclasses; i++) { + sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; + sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; + sstats->lstats[i].nrequests += astats->lstats[i].nrequests; + sstats->lstats[i].curruns += astats->lstats[i].curruns; + } + + for (i = 0; i < NBINS; i++) { + sstats->bstats[i].allocated += astats->bstats[i].allocated; + sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; + sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; + sstats->bstats[i].nrequests += astats->bstats[i].nrequests; + if (config_tcache) { + sstats->bstats[i].nfills += astats->bstats[i].nfills; + sstats->bstats[i].nflushes += + astats->bstats[i].nflushes; + } + sstats->bstats[i].nruns += astats->bstats[i].nruns; + sstats->bstats[i].reruns += astats->bstats[i].reruns; + sstats->bstats[i].curruns += astats->bstats[i].curruns; + } +} + +static void +ctl_arena_refresh(arena_t *arena, unsigned i) +{ + ctl_arena_stats_t *astats = &ctl_stats.arenas[i]; + ctl_arena_stats_t *sstats = &ctl_stats.arenas[narenas]; + + ctl_arena_clear(astats); + + sstats->nthreads += astats->nthreads; + if (config_stats) { + ctl_arena_stats_amerge(astats, arena); + /* Merge into sum stats as well. */ + ctl_arena_stats_smerge(sstats, astats); + } else { + astats->pactive += arena->nactive; + astats->pdirty += arena->ndirty; + /* Merge into sum stats as well. */ + sstats->pactive += arena->nactive; + sstats->pdirty += arena->ndirty; + } +} + +static void +ctl_refresh(void) +{ + unsigned i; + arena_t *tarenas[narenas]; + + if (config_stats) { + malloc_mutex_lock(&chunks_mtx); + ctl_stats.chunks.current = stats_chunks.curchunks; + ctl_stats.chunks.total = stats_chunks.nchunks; + ctl_stats.chunks.high = stats_chunks.highchunks; + malloc_mutex_unlock(&chunks_mtx); + + malloc_mutex_lock(&huge_mtx); + ctl_stats.huge.allocated = huge_allocated; + ctl_stats.huge.nmalloc = huge_nmalloc; + ctl_stats.huge.ndalloc = huge_ndalloc; + malloc_mutex_unlock(&huge_mtx); + } + + /* + * Clear sum stats, since they will be merged into by + * ctl_arena_refresh(). + */ + ctl_stats.arenas[narenas].nthreads = 0; + ctl_arena_clear(&ctl_stats.arenas[narenas]); + + malloc_mutex_lock(&arenas_lock); + memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); + for (i = 0; i < narenas; i++) { + if (arenas[i] != NULL) + ctl_stats.arenas[i].nthreads = arenas[i]->nthreads; + else + ctl_stats.arenas[i].nthreads = 0; + } + malloc_mutex_unlock(&arenas_lock); + for (i = 0; i < narenas; i++) { + bool initialized = (tarenas[i] != NULL); + + ctl_stats.arenas[i].initialized = initialized; + if (initialized) + ctl_arena_refresh(tarenas[i], i); + } + + if (config_stats) { + ctl_stats.allocated = ctl_stats.arenas[narenas].allocated_small + + ctl_stats.arenas[narenas].astats.allocated_large + + ctl_stats.huge.allocated; + ctl_stats.active = (ctl_stats.arenas[narenas].pactive << + LG_PAGE) + ctl_stats.huge.allocated; + ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); + } + + ctl_epoch++; +} + +static bool +ctl_init(void) +{ + bool ret; + + malloc_mutex_lock(&ctl_mtx); + if (ctl_initialized == false) { + /* + * Allocate space for one extra arena stats element, which + * contains summed stats across all arenas. + */ + ctl_stats.arenas = (ctl_arena_stats_t *)base_alloc( + (narenas + 1) * sizeof(ctl_arena_stats_t)); + if (ctl_stats.arenas == NULL) { + ret = true; + goto label_return; + } + memset(ctl_stats.arenas, 0, (narenas + 1) * + sizeof(ctl_arena_stats_t)); + + /* + * Initialize all stats structures, regardless of whether they + * ever get used. Lazy initialization would allow errors to + * cause inconsistent state to be viewable by the application. + */ + if (config_stats) { + unsigned i; + for (i = 0; i <= narenas; i++) { + if (ctl_arena_init(&ctl_stats.arenas[i])) { + ret = true; + goto label_return; + } + } + } + ctl_stats.arenas[narenas].initialized = true; + + ctl_epoch = 0; + ctl_refresh(); + ctl_initialized = true; + } + + ret = false; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static int +ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, + size_t *depthp) +{ + int ret; + const char *elm, *tdot, *dot; + size_t elen, i, j; + const ctl_node_t *node; + + elm = name; + /* Equivalent to strchrnul(). */ + dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot : strchr(elm, '\0'); + elen = (size_t)((uintptr_t)dot - (uintptr_t)elm); + if (elen == 0) { + ret = ENOENT; + goto label_return; + } + node = super_root_node; + for (i = 0; i < *depthp; i++) { + assert(node->named); + assert(node->u.named.nchildren > 0); + if (node->u.named.children[0].named) { + const ctl_node_t *pnode = node; + + /* Children are named. */ + for (j = 0; j < node->u.named.nchildren; j++) { + const ctl_node_t *child = + &node->u.named.children[j]; + if (strlen(child->u.named.name) == elen + && strncmp(elm, child->u.named.name, + elen) == 0) { + node = child; + if (nodesp != NULL) + nodesp[i] = node; + mibp[i] = j; + break; + } + } + if (node == pnode) { + ret = ENOENT; + goto label_return; + } + } else { + uintmax_t index; + const ctl_node_t *inode; + + /* Children are indexed. */ + index = malloc_strtoumax(elm, NULL, 10); + if (index == UINTMAX_MAX || index > SIZE_T_MAX) { + ret = ENOENT; + goto label_return; + } + + inode = &node->u.named.children[0]; + node = inode->u.indexed.index(mibp, *depthp, + (size_t)index); + if (node == NULL) { + ret = ENOENT; + goto label_return; + } + + if (nodesp != NULL) + nodesp[i] = node; + mibp[i] = (size_t)index; + } + + if (node->ctl != NULL) { + /* Terminal node. */ + if (*dot != '\0') { + /* + * The name contains more elements than are + * in this path through the tree. + */ + ret = ENOENT; + goto label_return; + } + /* Complete lookup successful. */ + *depthp = i + 1; + break; + } + + /* Update elm. */ + if (*dot == '\0') { + /* No more elements. */ + ret = ENOENT; + goto label_return; + } + elm = &dot[1]; + dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot : + strchr(elm, '\0'); + elen = (size_t)((uintptr_t)dot - (uintptr_t)elm); + } + + ret = 0; +label_return: + return (ret); +} + +int +ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) +{ + int ret; + size_t depth; + ctl_node_t const *nodes[CTL_MAX_DEPTH]; + size_t mib[CTL_MAX_DEPTH]; + + if (ctl_initialized == false && ctl_init()) { + ret = EAGAIN; + goto label_return; + } + + depth = CTL_MAX_DEPTH; + ret = ctl_lookup(name, nodes, mib, &depth); + if (ret != 0) + goto label_return; + + if (nodes[depth-1]->ctl == NULL) { + /* The name refers to a partial path through the ctl tree. */ + ret = ENOENT; + goto label_return; + } + + ret = nodes[depth-1]->ctl(mib, depth, oldp, oldlenp, newp, newlen); +label_return: + return(ret); +} + +int +ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp) +{ + int ret; + + if (ctl_initialized == false && ctl_init()) { + ret = EAGAIN; + goto label_return; + } + + ret = ctl_lookup(name, NULL, mibp, miblenp); +label_return: + return(ret); +} + +int +ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + const ctl_node_t *node; + size_t i; + + if (ctl_initialized == false && ctl_init()) { + ret = EAGAIN; + goto label_return; + } + + /* Iterate down the tree. */ + node = super_root_node; + for (i = 0; i < miblen; i++) { + if (node->u.named.children[0].named) { + /* Children are named. */ + if (node->u.named.nchildren <= mib[i]) { + ret = ENOENT; + goto label_return; + } + node = &node->u.named.children[mib[i]]; + } else { + const ctl_node_t *inode; + + /* Indexed element. */ + inode = &node->u.named.children[0]; + node = inode->u.indexed.index(mib, miblen, mib[i]); + if (node == NULL) { + ret = ENOENT; + goto label_return; + } + } + } + + /* Call the ctl function. */ + if (node->ctl == NULL) { + /* Partial MIB. */ + ret = ENOENT; + goto label_return; + } + ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen); + +label_return: + return(ret); +} + +bool +ctl_boot(void) +{ + + if (malloc_mutex_init(&ctl_mtx)) + return (true); + + ctl_initialized = false; + + return (false); +} + +/******************************************************************************/ +/* *_ctl() functions. */ + +#define READONLY() do { \ + if (newp != NULL || newlen != 0) { \ + ret = EPERM; \ + goto label_return; \ + } \ +} while (0) + +#define WRITEONLY() do { \ + if (oldp != NULL || oldlenp != NULL) { \ + ret = EPERM; \ + goto label_return; \ + } \ +} while (0) + +#define VOID() do { \ + READONLY(); \ + WRITEONLY(); \ +} while (0) + +#define READ(v, t) do { \ + if (oldp != NULL && oldlenp != NULL) { \ + if (*oldlenp != sizeof(t)) { \ + size_t copylen = (sizeof(t) <= *oldlenp) \ + ? sizeof(t) : *oldlenp; \ + memcpy(oldp, (void *)&v, copylen); \ + ret = EINVAL; \ + goto label_return; \ + } else \ + *(t *)oldp = v; \ + } \ +} while (0) + +#define WRITE(v, t) do { \ + if (newp != NULL) { \ + if (newlen != sizeof(t)) { \ + ret = EINVAL; \ + goto label_return; \ + } \ + v = *(t *)newp; \ + } \ +} while (0) + +/* + * There's a lot of code duplication in the following macros due to limitations + * in how nested cpp macros are expanded. + */ +#define CTL_RO_CLGEN(c, l, n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ + if ((c) == false) \ + return (ENOENT); \ + if (l) \ + malloc_mutex_lock(&ctl_mtx); \ + READONLY(); \ + oldval = v; \ + READ(oldval, t); \ + \ + ret = 0; \ +label_return: \ + if (l) \ + malloc_mutex_unlock(&ctl_mtx); \ + return (ret); \ +} + +#define CTL_RO_CGEN(c, n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ + if ((c) == false) \ + return (ENOENT); \ + malloc_mutex_lock(&ctl_mtx); \ + READONLY(); \ + oldval = v; \ + READ(oldval, t); \ + \ + ret = 0; \ +label_return: \ + malloc_mutex_unlock(&ctl_mtx); \ + return (ret); \ +} + +#define CTL_RO_GEN(n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ + malloc_mutex_lock(&ctl_mtx); \ + READONLY(); \ + oldval = v; \ + READ(oldval, t); \ + \ + ret = 0; \ +label_return: \ + malloc_mutex_unlock(&ctl_mtx); \ + return (ret); \ +} + +/* + * ctl_mtx is not acquired, under the assumption that no pertinent data will + * mutate during the call. + */ +#define CTL_RO_NL_CGEN(c, n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ + if ((c) == false) \ + return (ENOENT); \ + READONLY(); \ + oldval = v; \ + READ(oldval, t); \ + \ + ret = 0; \ +label_return: \ + return (ret); \ +} + +#define CTL_RO_NL_GEN(n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ + READONLY(); \ + oldval = v; \ + READ(oldval, t); \ + \ + ret = 0; \ +label_return: \ + return (ret); \ +} + +#define CTL_RO_BOOL_CONFIG_GEN(n) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + bool oldval; \ + \ + READONLY(); \ + oldval = n; \ + READ(oldval, bool); \ + \ + ret = 0; \ +label_return: \ + return (ret); \ +} + +CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *) + +static int +epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + uint64_t newval; + + malloc_mutex_lock(&ctl_mtx); + newval = 0; + WRITE(newval, uint64_t); + if (newval != 0) + ctl_refresh(); + READ(ctl_epoch, uint64_t); + + ret = 0; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static int +thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + bool oldval; + + if (config_tcache == false) + return (ENOENT); + + oldval = tcache_enabled_get(); + if (newp != NULL) { + if (newlen != sizeof(bool)) { + ret = EINVAL; + goto label_return; + } + tcache_enabled_set(*(bool *)newp); + } + READ(oldval, bool); + +label_return: + ret = 0; + return (ret); +} + +static int +thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + + if (config_tcache == false) + return (ENOENT); + + VOID(); + + tcache_flush(); + + ret = 0; +label_return: + return (ret); +} + +static int +thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + unsigned newind, oldind; + + newind = oldind = choose_arena(NULL)->ind; + WRITE(newind, unsigned); + READ(oldind, unsigned); + if (newind != oldind) { + arena_t *arena; + + if (newind >= narenas) { + /* New arena index is out of range. */ + ret = EFAULT; + goto label_return; + } + + /* Initialize arena if necessary. */ + malloc_mutex_lock(&arenas_lock); + if ((arena = arenas[newind]) == NULL && (arena = + arenas_extend(newind)) == NULL) { + malloc_mutex_unlock(&arenas_lock); + ret = EAGAIN; + goto label_return; + } + assert(arena == arenas[newind]); + arenas[oldind]->nthreads--; + arenas[newind]->nthreads++; + malloc_mutex_unlock(&arenas_lock); + + /* Set new arena association. */ + if (config_tcache) { + tcache_t *tcache; + if ((uintptr_t)(tcache = *tcache_tsd_get()) > + (uintptr_t)TCACHE_STATE_MAX) { + tcache_arena_dissociate(tcache); + tcache_arena_associate(tcache, arena); + } + } + arenas_tsd_set(&arena); + } + + ret = 0; +label_return: + return (ret); +} + +CTL_RO_NL_CGEN(config_stats, thread_allocated, + thread_allocated_tsd_get()->allocated, uint64_t) +CTL_RO_NL_CGEN(config_stats, thread_allocatedp, + &thread_allocated_tsd_get()->allocated, uint64_t *) +CTL_RO_NL_CGEN(config_stats, thread_deallocated, + thread_allocated_tsd_get()->deallocated, uint64_t) +CTL_RO_NL_CGEN(config_stats, thread_deallocatedp, + &thread_allocated_tsd_get()->deallocated, uint64_t *) + +/******************************************************************************/ + +CTL_RO_BOOL_CONFIG_GEN(config_debug) +CTL_RO_BOOL_CONFIG_GEN(config_dss) +CTL_RO_BOOL_CONFIG_GEN(config_fill) +CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) +CTL_RO_BOOL_CONFIG_GEN(config_munmap) +CTL_RO_BOOL_CONFIG_GEN(config_prof) +CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc) +CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind) +CTL_RO_BOOL_CONFIG_GEN(config_stats) +CTL_RO_BOOL_CONFIG_GEN(config_tcache) +CTL_RO_BOOL_CONFIG_GEN(config_tls) +CTL_RO_BOOL_CONFIG_GEN(config_utrace) +CTL_RO_BOOL_CONFIG_GEN(config_valgrind) +CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) + +/******************************************************************************/ + +CTL_RO_NL_GEN(opt_abort, opt_abort, bool) +CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) +CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) +CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) +CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) +CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool) +CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) +CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t) +CTL_RO_NL_CGEN(config_fill, opt_redzone, opt_redzone, bool) +CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) +CTL_RO_NL_CGEN(config_valgrind, opt_valgrind, opt_valgrind, bool) +CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) +CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) +CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) +CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) +CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */ +CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t) +CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) +CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool) +CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool) + +/******************************************************************************/ + +CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) +CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) +const ctl_node_t * +arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) +{ + + if (i > NBINS) + return (NULL); + return (super_arenas_bin_i_node); +} + +CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t) +const ctl_node_t * +arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) +{ + + if (i > nlclasses) + return (NULL); + return (super_arenas_lrun_i_node); +} + +CTL_RO_NL_GEN(arenas_narenas, narenas, unsigned) + +static int +arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + unsigned nread, i; + + malloc_mutex_lock(&ctl_mtx); + READONLY(); + if (*oldlenp != narenas * sizeof(bool)) { + ret = EINVAL; + nread = (*oldlenp < narenas * sizeof(bool)) + ? (*oldlenp / sizeof(bool)) : narenas; + } else { + ret = 0; + nread = narenas; + } + + for (i = 0; i < nread; i++) + ((bool *)oldp)[i] = ctl_stats.arenas[i].initialized; + +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t) +CTL_RO_NL_GEN(arenas_page, PAGE, size_t) +CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t) +CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned) +CTL_RO_NL_CGEN(config_tcache, arenas_nhbins, nhbins, unsigned) +CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t) + +static int +arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + unsigned arena; + + WRITEONLY(); + arena = UINT_MAX; + WRITE(arena, unsigned); + if (newp != NULL && arena >= narenas) { + ret = EFAULT; + goto label_return; + } else { + arena_t *tarenas[narenas]; + + malloc_mutex_lock(&arenas_lock); + memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); + malloc_mutex_unlock(&arenas_lock); + + if (arena == UINT_MAX) { + unsigned i; + for (i = 0; i < narenas; i++) { + if (tarenas[i] != NULL) + arena_purge_all(tarenas[i]); + } + } else { + assert(arena < narenas); + if (tarenas[arena] != NULL) + arena_purge_all(tarenas[arena]); + } + } + + ret = 0; +label_return: + return (ret); +} + +/******************************************************************************/ + +static int +prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + bool oldval; + + if (config_prof == false) + return (ENOENT); + + malloc_mutex_lock(&ctl_mtx); /* Protect opt_prof_active. */ + oldval = opt_prof_active; + if (newp != NULL) { + /* + * The memory barriers will tend to make opt_prof_active + * propagate faster on systems with weak memory ordering. + */ + mb_write(); + WRITE(opt_prof_active, bool); + mb_write(); + } + READ(oldval, bool); + + ret = 0; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static int +prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + const char *filename = NULL; + + if (config_prof == false) + return (ENOENT); + + WRITEONLY(); + WRITE(filename, const char *); + + if (prof_mdump(filename)) { + ret = EFAULT; + goto label_return; + } + + ret = 0; +label_return: + return (ret); +} + +CTL_RO_NL_CGEN(config_prof, prof_interval, prof_interval, uint64_t) + +/******************************************************************************/ + +CTL_RO_CGEN(config_stats, stats_chunks_current, ctl_stats.chunks.current, + size_t) +CTL_RO_CGEN(config_stats, stats_chunks_total, ctl_stats.chunks.total, uint64_t) +CTL_RO_CGEN(config_stats, stats_chunks_high, ctl_stats.chunks.high, size_t) +CTL_RO_CGEN(config_stats, stats_huge_allocated, huge_allocated, size_t) +CTL_RO_CGEN(config_stats, stats_huge_nmalloc, huge_nmalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_huge_ndalloc, huge_ndalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated, + ctl_stats.arenas[mib[2]].allocated_small, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc, + ctl_stats.arenas[mib[2]].nmalloc_small, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_small_ndalloc, + ctl_stats.arenas[mib[2]].ndalloc_small, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests, + ctl_stats.arenas[mib[2]].nrequests_small, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated, + ctl_stats.arenas[mib[2]].astats.allocated_large, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc, + ctl_stats.arenas[mib[2]].astats.nmalloc_large, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc, + ctl_stats.arenas[mib[2]].astats.ndalloc_large, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests, + ctl_stats.arenas[mib[2]].astats.nrequests_large, uint64_t) + +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_allocated, + ctl_stats.arenas[mib[2]].bstats[mib[4]].allocated, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc, + ctl_stats.arenas[mib[2]].bstats[mib[4]].nmalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_ndalloc, + ctl_stats.arenas[mib[2]].bstats[mib[4]].ndalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nrequests, + ctl_stats.arenas[mib[2]].bstats[mib[4]].nrequests, uint64_t) +CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nfills, + ctl_stats.arenas[mib[2]].bstats[mib[4]].nfills, uint64_t) +CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nflushes, + ctl_stats.arenas[mib[2]].bstats[mib[4]].nflushes, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nruns, + ctl_stats.arenas[mib[2]].bstats[mib[4]].nruns, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreruns, + ctl_stats.arenas[mib[2]].bstats[mib[4]].reruns, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns, + ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t) + +const ctl_node_t * +stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j) +{ + + if (j > NBINS) + return (NULL); + return (super_stats_arenas_i_bins_j_node); +} + +CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nmalloc, + ctl_stats.arenas[mib[2]].lstats[mib[4]].nmalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_ndalloc, + ctl_stats.arenas[mib[2]].lstats[mib[4]].ndalloc, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nrequests, + ctl_stats.arenas[mib[2]].lstats[mib[4]].nrequests, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns, + ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t) + +const ctl_node_t * +stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) +{ + + if (j > nlclasses) + return (NULL); + return (super_stats_arenas_i_lruns_j_node); +} + +CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) +CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) +CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_mapped, + ctl_stats.arenas[mib[2]].astats.mapped, size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_npurge, + ctl_stats.arenas[mib[2]].astats.npurge, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise, + ctl_stats.arenas[mib[2]].astats.nmadvise, uint64_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_purged, + ctl_stats.arenas[mib[2]].astats.purged, uint64_t) + +const ctl_node_t * +stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) +{ + const ctl_node_t * ret; + + malloc_mutex_lock(&ctl_mtx); + if (ctl_stats.arenas[i].initialized == false) { + ret = NULL; + goto label_return; + } + + ret = super_stats_arenas_i_node; +label_return: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *) +CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t) +CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t) +CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t) diff --git a/contrib/jemalloc/src/extent.c b/contrib/jemalloc/src/extent.c new file mode 100644 index 000000000000..8c09b486ed81 --- /dev/null +++ b/contrib/jemalloc/src/extent.c @@ -0,0 +1,39 @@ +#define JEMALLOC_EXTENT_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ + +static inline int +extent_szad_comp(extent_node_t *a, extent_node_t *b) +{ + int ret; + size_t a_size = a->size; + size_t b_size = b->size; + + ret = (a_size > b_size) - (a_size < b_size); + if (ret == 0) { + uintptr_t a_addr = (uintptr_t)a->addr; + uintptr_t b_addr = (uintptr_t)b->addr; + + ret = (a_addr > b_addr) - (a_addr < b_addr); + } + + return (ret); +} + +/* Generate red-black tree functions. */ +rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, link_szad, + extent_szad_comp) + +static inline int +extent_ad_comp(extent_node_t *a, extent_node_t *b) +{ + uintptr_t a_addr = (uintptr_t)a->addr; + uintptr_t b_addr = (uintptr_t)b->addr; + + return ((a_addr > b_addr) - (a_addr < b_addr)); +} + +/* Generate red-black tree functions. */ +rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad, + extent_ad_comp) diff --git a/contrib/jemalloc/src/hash.c b/contrib/jemalloc/src/hash.c new file mode 100644 index 000000000000..cfa4da0275cb --- /dev/null +++ b/contrib/jemalloc/src/hash.c @@ -0,0 +1,2 @@ +#define JEMALLOC_HASH_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/contrib/jemalloc/src/huge.c b/contrib/jemalloc/src/huge.c new file mode 100644 index 000000000000..daf0c6228740 --- /dev/null +++ b/contrib/jemalloc/src/huge.c @@ -0,0 +1,306 @@ +#define JEMALLOC_HUGE_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +uint64_t huge_nmalloc; +uint64_t huge_ndalloc; +size_t huge_allocated; + +malloc_mutex_t huge_mtx; + +/******************************************************************************/ + +/* Tree of chunks that are stand-alone huge allocations. */ +static extent_tree_t huge; + +void * +huge_malloc(size_t size, bool zero) +{ + + return (huge_palloc(size, chunksize, zero)); +} + +void * +huge_palloc(size_t size, size_t alignment, bool zero) +{ + void *ret; + size_t csize; + extent_node_t *node; + + /* Allocate one or more contiguous chunks for this request. */ + + csize = CHUNK_CEILING(size); + if (csize == 0) { + /* size is large enough to cause size_t wrap-around. */ + return (NULL); + } + + /* Allocate an extent node with which to track the chunk. */ + node = base_node_alloc(); + if (node == NULL) + return (NULL); + + ret = chunk_alloc(csize, alignment, false, &zero); + if (ret == NULL) { + base_node_dealloc(node); + return (NULL); + } + + /* Insert node into huge. */ + node->addr = ret; + node->size = csize; + + malloc_mutex_lock(&huge_mtx); + extent_tree_ad_insert(&huge, node); + if (config_stats) { + stats_cactive_add(csize); + huge_nmalloc++; + huge_allocated += csize; + } + malloc_mutex_unlock(&huge_mtx); + + if (config_fill && zero == false) { + if (opt_junk) + memset(ret, 0xa5, csize); + else if (opt_zero) + memset(ret, 0, csize); + } + + return (ret); +} + +void * +huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) +{ + + /* + * Avoid moving the allocation if the size class can be left the same. + */ + if (oldsize > arena_maxclass + && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) + && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { + assert(CHUNK_CEILING(oldsize) == oldsize); + if (config_fill && opt_junk && size < oldsize) { + memset((void *)((uintptr_t)ptr + size), 0x5a, + oldsize - size); + } + return (ptr); + } + + /* Reallocation would require a move. */ + return (NULL); +} + +void * +huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero) +{ + void *ret; + size_t copysize; + + /* Try to avoid moving the allocation. */ + ret = huge_ralloc_no_move(ptr, oldsize, size, extra); + if (ret != NULL) + return (ret); + + /* + * size and oldsize are different enough that we need to use a + * different size class. In that case, fall back to allocating new + * space and copying. + */ + if (alignment > chunksize) + ret = huge_palloc(size + extra, alignment, zero); + else + ret = huge_malloc(size + extra, zero); + + if (ret == NULL) { + if (extra == 0) + return (NULL); + /* Try again, this time without extra. */ + if (alignment > chunksize) + ret = huge_palloc(size, alignment, zero); + else + ret = huge_malloc(size, zero); + + if (ret == NULL) + return (NULL); + } + + /* + * Copy at most size bytes (not size+extra), since the caller has no + * expectation that the extra bytes will be reliably preserved. + */ + copysize = (size < oldsize) ? size : oldsize; + + /* + * Use mremap(2) if this is a huge-->huge reallocation, and neither the + * source nor the destination are in dss. + */ +#ifdef JEMALLOC_MREMAP_FIXED + if (oldsize >= chunksize && (config_dss == false || (chunk_in_dss(ptr) + == false && chunk_in_dss(ret) == false))) { + size_t newsize = huge_salloc(ret); + + /* + * Remove ptr from the tree of huge allocations before + * performing the remap operation, in order to avoid the + * possibility of another thread acquiring that mapping before + * this one removes it from the tree. + */ + huge_dalloc(ptr, false); + if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED, + ret) == MAP_FAILED) { + /* + * Assuming no chunk management bugs in the allocator, + * the only documented way an error can occur here is + * if the application changed the map type for a + * portion of the old allocation. This is firmly in + * undefined behavior territory, so write a diagnostic + * message, and optionally abort. + */ + char buf[BUFERROR_BUF]; + + buferror(errno, buf, sizeof(buf)); + malloc_printf(": Error in mremap(): %s\n", + buf); + if (opt_abort) + abort(); + memcpy(ret, ptr, copysize); + chunk_dealloc_mmap(ptr, oldsize); + } + } else +#endif + { + memcpy(ret, ptr, copysize); + iqalloc(ptr); + } + return (ret); +} + +void +huge_dalloc(void *ptr, bool unmap) +{ + extent_node_t *node, key; + + malloc_mutex_lock(&huge_mtx); + + /* Extract from tree of huge allocations. */ + key.addr = ptr; + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + assert(node->addr == ptr); + extent_tree_ad_remove(&huge, node); + + if (config_stats) { + stats_cactive_sub(node->size); + huge_ndalloc++; + huge_allocated -= node->size; + } + + malloc_mutex_unlock(&huge_mtx); + + if (unmap && config_fill && config_dss && opt_junk) + memset(node->addr, 0x5a, node->size); + + chunk_dealloc(node->addr, node->size, unmap); + + base_node_dealloc(node); +} + +size_t +huge_salloc(const void *ptr) +{ + size_t ret; + extent_node_t *node, key; + + malloc_mutex_lock(&huge_mtx); + + /* Extract from tree of huge allocations. */ + key.addr = __DECONST(void *, ptr); + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + + ret = node->size; + + malloc_mutex_unlock(&huge_mtx); + + return (ret); +} + +prof_ctx_t * +huge_prof_ctx_get(const void *ptr) +{ + prof_ctx_t *ret; + extent_node_t *node, key; + + malloc_mutex_lock(&huge_mtx); + + /* Extract from tree of huge allocations. */ + key.addr = __DECONST(void *, ptr); + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + + ret = node->prof_ctx; + + malloc_mutex_unlock(&huge_mtx); + + return (ret); +} + +void +huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +{ + extent_node_t *node, key; + + malloc_mutex_lock(&huge_mtx); + + /* Extract from tree of huge allocations. */ + key.addr = __DECONST(void *, ptr); + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + + node->prof_ctx = ctx; + + malloc_mutex_unlock(&huge_mtx); +} + +bool +huge_boot(void) +{ + + /* Initialize chunks data. */ + if (malloc_mutex_init(&huge_mtx)) + return (true); + extent_tree_ad_new(&huge); + + if (config_stats) { + huge_nmalloc = 0; + huge_ndalloc = 0; + huge_allocated = 0; + } + + return (false); +} + +void +huge_prefork(void) +{ + + malloc_mutex_prefork(&huge_mtx); +} + +void +huge_postfork_parent(void) +{ + + malloc_mutex_postfork_parent(&huge_mtx); +} + +void +huge_postfork_child(void) +{ + + malloc_mutex_postfork_child(&huge_mtx); +} diff --git a/contrib/jemalloc/src/jemalloc.c b/contrib/jemalloc/src/jemalloc.c new file mode 100644 index 000000000000..73fad298ab52 --- /dev/null +++ b/contrib/jemalloc/src/jemalloc.c @@ -0,0 +1,1733 @@ +#define JEMALLOC_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +malloc_tsd_data(, arenas, arena_t *, NULL) +malloc_tsd_data(, thread_allocated, thread_allocated_t, + THREAD_ALLOCATED_INITIALIZER) + +const char *__malloc_options_1_0; +__sym_compat(_malloc_options, __malloc_options_1_0, FBSD_1.0); + +/* Runtime configuration options. */ +const char *je_malloc_conf JEMALLOC_ATTR(visibility("default")); +#ifdef JEMALLOC_DEBUG +bool opt_abort = true; +# ifdef JEMALLOC_FILL +bool opt_junk = true; +# else +bool opt_junk = false; +# endif +#else +bool opt_abort = false; +bool opt_junk = false; +#endif +size_t opt_quarantine = ZU(0); +bool opt_redzone = false; +bool opt_utrace = false; +bool opt_valgrind = false; +bool opt_xmalloc = false; +bool opt_zero = false; +size_t opt_narenas = 0; + +unsigned ncpus; + +malloc_mutex_t arenas_lock; +arena_t **arenas; +unsigned narenas; + +/* Set to true once the allocator has been initialized. */ +static bool malloc_initialized = false; + +#ifdef JEMALLOC_THREADED_INIT +/* Used to let the initializing thread recursively allocate. */ +# define NO_INITIALIZER ((unsigned long)0) +# define INITIALIZER pthread_self() +# define IS_INITIALIZER (malloc_initializer == pthread_self()) +static pthread_t malloc_initializer = NO_INITIALIZER; +#else +# define NO_INITIALIZER false +# define INITIALIZER true +# define IS_INITIALIZER malloc_initializer +static bool malloc_initializer = NO_INITIALIZER; +#endif + +/* Used to avoid initialization races. */ +static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; + +typedef struct { + void *p; /* Input pointer (as in realloc(p, s)). */ + size_t s; /* Request size. */ + void *r; /* Result pointer. */ +} malloc_utrace_t; + +#ifdef JEMALLOC_UTRACE +# define UTRACE(a, b, c) do { \ + if (opt_utrace) { \ + malloc_utrace_t ut; \ + ut.p = (a); \ + ut.s = (b); \ + ut.r = (c); \ + utrace(&ut, sizeof(ut)); \ + } \ +} while (0) +#else +# define UTRACE(a, b, c) +#endif + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void stats_print_atexit(void); +static unsigned malloc_ncpus(void); +static bool malloc_conf_next(char const **opts_p, char const **k_p, + size_t *klen_p, char const **v_p, size_t *vlen_p); +static void malloc_conf_error(const char *msg, const char *k, size_t klen, + const char *v, size_t vlen); +static void malloc_conf_init(void); +static bool malloc_init_hard(void); +static int imemalign(void **memptr, size_t alignment, size_t size, + size_t min_alignment); + +/******************************************************************************/ +/* + * Begin miscellaneous support functions. + */ + +/* Create a new arena and insert it into the arenas array at index ind. */ +arena_t * +arenas_extend(unsigned ind) +{ + arena_t *ret; + + ret = (arena_t *)base_alloc(sizeof(arena_t)); + if (ret != NULL && arena_new(ret, ind) == false) { + arenas[ind] = ret; + return (ret); + } + /* Only reached if there is an OOM error. */ + + /* + * OOM here is quite inconvenient to propagate, since dealing with it + * would require a check for failure in the fast path. Instead, punt + * by using arenas[0]. In practice, this is an extremely unlikely + * failure. + */ + malloc_write(": Error initializing arena\n"); + if (opt_abort) + abort(); + + return (arenas[0]); +} + +/* Slow path, called only by choose_arena(). */ +arena_t * +choose_arena_hard(void) +{ + arena_t *ret; + + if (narenas > 1) { + unsigned i, choose, first_null; + + choose = 0; + first_null = narenas; + malloc_mutex_lock(&arenas_lock); + assert(arenas[0] != NULL); + for (i = 1; i < narenas; i++) { + if (arenas[i] != NULL) { + /* + * Choose the first arena that has the lowest + * number of threads assigned to it. + */ + if (arenas[i]->nthreads < + arenas[choose]->nthreads) + choose = i; + } else if (first_null == narenas) { + /* + * Record the index of the first uninitialized + * arena, in case all extant arenas are in use. + * + * NB: It is possible for there to be + * discontinuities in terms of initialized + * versus uninitialized arenas, due to the + * "thread.arena" mallctl. + */ + first_null = i; + } + } + + if (arenas[choose]->nthreads == 0 || first_null == narenas) { + /* + * Use an unloaded arena, or the least loaded arena if + * all arenas are already initialized. + */ + ret = arenas[choose]; + } else { + /* Initialize a new arena. */ + ret = arenas_extend(first_null); + } + ret->nthreads++; + malloc_mutex_unlock(&arenas_lock); + } else { + ret = arenas[0]; + malloc_mutex_lock(&arenas_lock); + ret->nthreads++; + malloc_mutex_unlock(&arenas_lock); + } + + arenas_tsd_set(&ret); + + return (ret); +} + +static void +stats_print_atexit(void) +{ + + if (config_tcache && config_stats) { + unsigned i; + + /* + * Merge stats from extant threads. This is racy, since + * individual threads do not lock when recording tcache stats + * events. As a consequence, the final stats may be slightly + * out of date by the time they are reported, if other threads + * continue to allocate. + */ + for (i = 0; i < narenas; i++) { + arena_t *arena = arenas[i]; + if (arena != NULL) { + tcache_t *tcache; + + /* + * tcache_stats_merge() locks bins, so if any + * code is introduced that acquires both arena + * and bin locks in the opposite order, + * deadlocks may result. + */ + malloc_mutex_lock(&arena->lock); + ql_foreach(tcache, &arena->tcache_ql, link) { + tcache_stats_merge(tcache, arena); + } + malloc_mutex_unlock(&arena->lock); + } + } + } + je_malloc_stats_print(NULL, NULL, NULL); +} + +/* + * End miscellaneous support functions. + */ +/******************************************************************************/ +/* + * Begin initialization functions. + */ + +static unsigned +malloc_ncpus(void) +{ + unsigned ret; + long result; + + result = sysconf(_SC_NPROCESSORS_ONLN); + if (result == -1) { + /* Error. */ + ret = 1; + } + ret = (unsigned)result; + + return (ret); +} + +void +arenas_cleanup(void *arg) +{ + arena_t *arena = *(arena_t **)arg; + + malloc_mutex_lock(&arenas_lock); + arena->nthreads--; + malloc_mutex_unlock(&arenas_lock); +} + +static inline bool +malloc_init(void) +{ + + if (malloc_initialized == false) + return (malloc_init_hard()); + + return (false); +} + +static bool +malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, + char const **v_p, size_t *vlen_p) +{ + bool accept; + const char *opts = *opts_p; + + *k_p = opts; + + for (accept = false; accept == false;) { + switch (*opts) { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case '_': + opts++; + break; + case ':': + opts++; + *klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p; + *v_p = opts; + accept = true; + break; + case '\0': + if (opts != *opts_p) { + malloc_write(": Conf string ends " + "with key\n"); + } + return (true); + default: + malloc_write(": Malformed conf string\n"); + return (true); + } + } + + for (accept = false; accept == false;) { + switch (*opts) { + case ',': + opts++; + /* + * Look ahead one character here, because the next time + * this function is called, it will assume that end of + * input has been cleanly reached if no input remains, + * but we have optimistically already consumed the + * comma if one exists. + */ + if (*opts == '\0') { + malloc_write(": Conf string ends " + "with comma\n"); + } + *vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p; + accept = true; + break; + case '\0': + *vlen_p = (uintptr_t)opts - (uintptr_t)*v_p; + accept = true; + break; + default: + opts++; + break; + } + } + + *opts_p = opts; + return (false); +} + +static void +malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, + size_t vlen) +{ + + malloc_printf(": %s: %.*s:%.*s\n", msg, (int)klen, k, + (int)vlen, v); +} + +static void +malloc_conf_init(void) +{ + unsigned i; + char buf[PATH_MAX + 1]; + const char *opts, *k, *v; + size_t klen, vlen; + + for (i = 0; i < 3; i++) { + /* Get runtime configuration. */ + switch (i) { + case 0: + if (je_malloc_conf != NULL) { + /* + * Use options that were compiled into the + * program. + */ + opts = je_malloc_conf; + } else { + /* No configuration specified. */ + buf[0] = '\0'; + opts = buf; + } + break; + case 1: { + int linklen; + const char *linkname = +#ifdef JEMALLOC_PREFIX + "/etc/"JEMALLOC_PREFIX"malloc.conf" +#else + "/etc/malloc.conf" +#endif + ; + + if ((linklen = readlink(linkname, buf, + sizeof(buf) - 1)) != -1) { + /* + * Use the contents of the "/etc/malloc.conf" + * symbolic link's name. + */ + buf[linklen] = '\0'; + opts = buf; + } else { + /* No configuration specified. */ + buf[0] = '\0'; + opts = buf; + } + break; + } case 2: { + const char *envname = +#ifdef JEMALLOC_PREFIX + JEMALLOC_CPREFIX"MALLOC_CONF" +#else + "MALLOC_CONF" +#endif + ; + + if (issetugid() == 0 && (opts = getenv(envname)) != + NULL) { + /* + * Do nothing; opts is already initialized to + * the value of the MALLOC_CONF environment + * variable. + */ + } else { + /* No configuration specified. */ + buf[0] = '\0'; + opts = buf; + } + break; + } default: + /* NOTREACHED */ + assert(false); + buf[0] = '\0'; + opts = buf; + } + + while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, + &vlen) == false) { +#define CONF_HANDLE_BOOL_HIT(o, n, hit) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + if (strncmp("true", v, vlen) == 0 && \ + vlen == sizeof("true")-1) \ + o = true; \ + else if (strncmp("false", v, vlen) == \ + 0 && vlen == sizeof("false")-1) \ + o = false; \ + else { \ + malloc_conf_error( \ + "Invalid conf value", \ + k, klen, v, vlen); \ + } \ + hit = true; \ + } else \ + hit = false; +#define CONF_HANDLE_BOOL(o, n) { \ + bool hit; \ + CONF_HANDLE_BOOL_HIT(o, n, hit); \ + if (hit) \ + continue; \ +} +#define CONF_HANDLE_SIZE_T(o, n, min, max) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + uintmax_t um; \ + char *end; \ + \ + errno = 0; \ + um = malloc_strtoumax(v, &end, 0); \ + if (errno != 0 || (uintptr_t)end - \ + (uintptr_t)v != vlen) { \ + malloc_conf_error( \ + "Invalid conf value", \ + k, klen, v, vlen); \ + } else if (um < min || um > max) { \ + malloc_conf_error( \ + "Out-of-range conf value", \ + k, klen, v, vlen); \ + } else \ + o = um; \ + continue; \ + } +#define CONF_HANDLE_SSIZE_T(o, n, min, max) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + long l; \ + char *end; \ + \ + errno = 0; \ + l = strtol(v, &end, 0); \ + if (errno != 0 || (uintptr_t)end - \ + (uintptr_t)v != vlen) { \ + malloc_conf_error( \ + "Invalid conf value", \ + k, klen, v, vlen); \ + } else if (l < (ssize_t)min || l > \ + (ssize_t)max) { \ + malloc_conf_error( \ + "Out-of-range conf value", \ + k, klen, v, vlen); \ + } else \ + o = l; \ + continue; \ + } +#define CONF_HANDLE_CHAR_P(o, n, d) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + size_t cpylen = (vlen <= \ + sizeof(o)-1) ? vlen : \ + sizeof(o)-1; \ + strncpy(o, v, cpylen); \ + o[cpylen] = '\0'; \ + continue; \ + } + + CONF_HANDLE_BOOL(opt_abort, abort) + /* + * Chunks always require at least one header page, plus + * one data page in the absence of redzones, or three + * pages in the presence of redzones. In order to + * simplify options processing, fix the limit based on + * config_fill. + */ + CONF_HANDLE_SIZE_T(opt_lg_chunk, lg_chunk, LG_PAGE + + (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SIZE_T(opt_narenas, narenas, 1, SIZE_T_MAX) + CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, lg_dirty_mult, + -1, (sizeof(size_t) << 3) - 1) + CONF_HANDLE_BOOL(opt_stats_print, stats_print) + if (config_fill) { + CONF_HANDLE_BOOL(opt_junk, junk) + CONF_HANDLE_SIZE_T(opt_quarantine, quarantine, + 0, SIZE_T_MAX) + CONF_HANDLE_BOOL(opt_redzone, redzone) + CONF_HANDLE_BOOL(opt_zero, zero) + } + if (config_utrace) { + CONF_HANDLE_BOOL(opt_utrace, utrace) + } + if (config_valgrind) { + bool hit; + CONF_HANDLE_BOOL_HIT(opt_valgrind, + valgrind, hit) + if (config_fill && opt_valgrind && hit) { + opt_junk = false; + opt_zero = false; + if (opt_quarantine == 0) { + opt_quarantine = + JEMALLOC_VALGRIND_QUARANTINE_DEFAULT; + } + opt_redzone = true; + } + if (hit) + continue; + } + if (config_xmalloc) { + CONF_HANDLE_BOOL(opt_xmalloc, xmalloc) + } + if (config_tcache) { + CONF_HANDLE_BOOL(opt_tcache, tcache) + CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, + lg_tcache_max, -1, + (sizeof(size_t) << 3) - 1) + } + if (config_prof) { + CONF_HANDLE_BOOL(opt_prof, prof) + CONF_HANDLE_CHAR_P(opt_prof_prefix, prof_prefix, + "jeprof") + CONF_HANDLE_BOOL(opt_prof_active, prof_active) + CONF_HANDLE_SSIZE_T(opt_lg_prof_sample, + lg_prof_sample, 0, + (sizeof(uint64_t) << 3) - 1) + CONF_HANDLE_BOOL(opt_prof_accum, prof_accum) + CONF_HANDLE_SSIZE_T(opt_lg_prof_interval, + lg_prof_interval, -1, + (sizeof(uint64_t) << 3) - 1) + CONF_HANDLE_BOOL(opt_prof_gdump, prof_gdump) + CONF_HANDLE_BOOL(opt_prof_leak, prof_leak) + } + malloc_conf_error("Invalid conf pair", k, klen, v, + vlen); +#undef CONF_HANDLE_BOOL +#undef CONF_HANDLE_SIZE_T +#undef CONF_HANDLE_SSIZE_T +#undef CONF_HANDLE_CHAR_P + } + } +} + +static bool +malloc_init_hard(void) +{ + arena_t *init_arenas[1]; + + malloc_mutex_lock(&init_lock); + if (malloc_initialized || IS_INITIALIZER) { + /* + * Another thread initialized the allocator before this one + * acquired init_lock, or this thread is the initializing + * thread, and it is recursively allocating. + */ + malloc_mutex_unlock(&init_lock); + return (false); + } +#ifdef JEMALLOC_THREADED_INIT + if (malloc_initializer != NO_INITIALIZER && IS_INITIALIZER == false) { + /* Busy-wait until the initializing thread completes. */ + do { + malloc_mutex_unlock(&init_lock); + CPU_SPINWAIT; + malloc_mutex_lock(&init_lock); + } while (malloc_initialized == false); + malloc_mutex_unlock(&init_lock); + return (false); + } +#endif + malloc_initializer = INITIALIZER; + + malloc_tsd_boot(); + if (config_prof) + prof_boot0(); + + malloc_conf_init(); + +#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE)) + /* Register fork handlers. */ + if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent, + jemalloc_postfork_child) != 0) { + malloc_write(": Error in pthread_atfork()\n"); + if (opt_abort) + abort(); + } +#endif + + if (opt_stats_print) { + /* Print statistics at exit. */ + if (atexit(stats_print_atexit) != 0) { + malloc_write(": Error in atexit()\n"); + if (opt_abort) + abort(); + } + } + + if (base_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (chunk_boot0()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (ctl_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (config_prof) + prof_boot1(); + + arena_boot(); + + if (config_tcache && tcache_boot0()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (huge_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (malloc_mutex_init(&arenas_lock)) + return (true); + + /* + * Create enough scaffolding to allow recursive allocation in + * malloc_ncpus(). + */ + narenas = 1; + arenas = init_arenas; + memset(arenas, 0, sizeof(arena_t *) * narenas); + + /* + * Initialize one arena here. The rest are lazily created in + * choose_arena_hard(). + */ + arenas_extend(0); + if (arenas[0] == NULL) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + /* Initialize allocation counters before any allocations can occur. */ + if (config_stats && thread_allocated_tsd_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (arenas_tsd_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (config_tcache && tcache_boot1()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (config_fill && quarantine_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (config_prof && prof_boot2()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + /* Get number of CPUs. */ + malloc_mutex_unlock(&init_lock); + ncpus = malloc_ncpus(); + malloc_mutex_lock(&init_lock); + + if (chunk_boot1()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (mutex_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (opt_narenas == 0) { + /* + * For SMP systems, create more than one arena per CPU by + * default. + */ + if (ncpus > 1) + opt_narenas = ncpus << 2; + else + opt_narenas = 1; + } + narenas = opt_narenas; + /* + * Make sure that the arenas array can be allocated. In practice, this + * limit is enough to allow the allocator to function, but the ctl + * machinery will fail to allocate memory at far lower limits. + */ + if (narenas > chunksize / sizeof(arena_t *)) { + narenas = chunksize / sizeof(arena_t *); + malloc_printf(": Reducing narenas to limit (%d)\n", + narenas); + } + + /* Allocate and initialize arenas. */ + arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); + if (arenas == NULL) { + malloc_mutex_unlock(&init_lock); + return (true); + } + /* + * Zero the array. In practice, this should always be pre-zeroed, + * since it was just mmap()ed, but let's be sure. + */ + memset(arenas, 0, sizeof(arena_t *) * narenas); + /* Copy the pointer to the one arena that was already initialized. */ + arenas[0] = init_arenas[0]; + + malloc_initialized = true; + malloc_mutex_unlock(&init_lock); + return (false); +} + +/* + * End initialization functions. + */ +/******************************************************************************/ +/* + * Begin malloc(3)-compatible functions. + */ + +JEMALLOC_ATTR(malloc) +JEMALLOC_ATTR(visibility("default")) +void * +je_malloc(size_t size) +{ + void *ret; + size_t usize; + prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); + + if (malloc_init()) { + ret = NULL; + goto label_oom; + } + + if (size == 0) + size = 1; + + if (config_prof && opt_prof) { + usize = s2u(size); + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) { + ret = NULL; + goto label_oom; + } + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= + SMALL_MAXCLASS) { + ret = imalloc(SMALL_MAXCLASS+1); + if (ret != NULL) + arena_prof_promoted(ret, usize); + } else + ret = imalloc(size); + } else { + if (config_stats || (config_valgrind && opt_valgrind)) + usize = s2u(size); + ret = imalloc(size); + } + +label_oom: + if (ret == NULL) { + if (config_xmalloc && opt_xmalloc) { + malloc_write(": Error in malloc(): " + "out of memory\n"); + abort(); + } + errno = ENOMEM; + } + if (config_prof && opt_prof && ret != NULL) + prof_malloc(ret, usize, cnt); + if (config_stats && ret != NULL) { + assert(usize == isalloc(ret, config_prof)); + thread_allocated_tsd_get()->allocated += usize; + } + UTRACE(0, size, ret); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false); + return (ret); +} + +JEMALLOC_ATTR(nonnull(1)) +#ifdef JEMALLOC_PROF +/* + * Avoid any uncertainty as to how many backtrace frames to ignore in + * PROF_ALLOC_PREP(). + */ +JEMALLOC_ATTR(noinline) +#endif +static int +imemalign(void **memptr, size_t alignment, size_t size, + size_t min_alignment) +{ + int ret; + size_t usize; + void *result; + prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); + + assert(min_alignment != 0); + + if (malloc_init()) + result = NULL; + else { + if (size == 0) + size = 1; + + /* Make sure that alignment is a large enough power of 2. */ + if (((alignment - 1) & alignment) != 0 + || (alignment < min_alignment)) { + if (config_xmalloc && opt_xmalloc) { + malloc_write(": Error allocating " + "aligned memory: invalid alignment\n"); + abort(); + } + result = NULL; + ret = EINVAL; + goto label_return; + } + + usize = sa2u(size, alignment); + if (usize == 0) { + result = NULL; + ret = ENOMEM; + goto label_return; + } + + if (config_prof && opt_prof) { + PROF_ALLOC_PREP(2, usize, cnt); + if (cnt == NULL) { + result = NULL; + ret = EINVAL; + } else { + if (prof_promote && (uintptr_t)cnt != + (uintptr_t)1U && usize <= SMALL_MAXCLASS) { + assert(sa2u(SMALL_MAXCLASS+1, + alignment) != 0); + result = ipalloc(sa2u(SMALL_MAXCLASS+1, + alignment), alignment, false); + if (result != NULL) { + arena_prof_promoted(result, + usize); + } + } else { + result = ipalloc(usize, alignment, + false); + } + } + } else + result = ipalloc(usize, alignment, false); + } + + if (result == NULL) { + if (config_xmalloc && opt_xmalloc) { + malloc_write(": Error allocating aligned " + "memory: out of memory\n"); + abort(); + } + ret = ENOMEM; + goto label_return; + } + + *memptr = result; + ret = 0; + +label_return: + if (config_stats && result != NULL) { + assert(usize == isalloc(result, config_prof)); + thread_allocated_tsd_get()->allocated += usize; + } + if (config_prof && opt_prof && result != NULL) + prof_malloc(result, usize, cnt); + UTRACE(0, size, result); + return (ret); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +je_posix_memalign(void **memptr, size_t alignment, size_t size) +{ + int ret = imemalign(memptr, alignment, size, sizeof(void *)); + JEMALLOC_VALGRIND_MALLOC(ret == 0, *memptr, isalloc(*memptr, + config_prof), false); + return (ret); +} + +JEMALLOC_ATTR(malloc) +JEMALLOC_ATTR(visibility("default")) +void * +je_aligned_alloc(size_t alignment, size_t size) +{ + void *ret; + int err; + + if ((err = imemalign(&ret, alignment, size, 1)) != 0) { + ret = NULL; + errno = err; + } + JEMALLOC_VALGRIND_MALLOC(err == 0, ret, isalloc(ret, config_prof), + false); + return (ret); +} + +JEMALLOC_ATTR(malloc) +JEMALLOC_ATTR(visibility("default")) +void * +je_calloc(size_t num, size_t size) +{ + void *ret; + size_t num_size; + size_t usize; + prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); + + if (malloc_init()) { + num_size = 0; + ret = NULL; + goto label_return; + } + + num_size = num * size; + if (num_size == 0) { + if (num == 0 || size == 0) + num_size = 1; + else { + ret = NULL; + goto label_return; + } + /* + * Try to avoid division here. We know that it isn't possible to + * overflow during multiplication if neither operand uses any of the + * most significant half of the bits in a size_t. + */ + } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2))) + && (num_size / size != num)) { + /* size_t overflow. */ + ret = NULL; + goto label_return; + } + + if (config_prof && opt_prof) { + usize = s2u(num_size); + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) { + ret = NULL; + goto label_return; + } + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize + <= SMALL_MAXCLASS) { + ret = icalloc(SMALL_MAXCLASS+1); + if (ret != NULL) + arena_prof_promoted(ret, usize); + } else + ret = icalloc(num_size); + } else { + if (config_stats || (config_valgrind && opt_valgrind)) + usize = s2u(num_size); + ret = icalloc(num_size); + } + +label_return: + if (ret == NULL) { + if (config_xmalloc && opt_xmalloc) { + malloc_write(": Error in calloc(): out of " + "memory\n"); + abort(); + } + errno = ENOMEM; + } + + if (config_prof && opt_prof && ret != NULL) + prof_malloc(ret, usize, cnt); + if (config_stats && ret != NULL) { + assert(usize == isalloc(ret, config_prof)); + thread_allocated_tsd_get()->allocated += usize; + } + UTRACE(0, num_size, ret); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, true); + return (ret); +} + +JEMALLOC_ATTR(visibility("default")) +void * +je_realloc(void *ptr, size_t size) +{ + void *ret; + size_t usize; + size_t old_size = 0; + size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); + prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL); + prof_ctx_t *old_ctx JEMALLOC_CC_SILENCE_INIT(NULL); + + if (size == 0) { + if (ptr != NULL) { + /* realloc(ptr, 0) is equivalent to free(p). */ + if (config_prof) { + old_size = isalloc(ptr, true); + if (config_valgrind && opt_valgrind) + old_rzsize = p2rz(ptr); + } else if (config_stats) { + old_size = isalloc(ptr, false); + if (config_valgrind && opt_valgrind) + old_rzsize = u2rz(old_size); + } else if (config_valgrind && opt_valgrind) { + old_size = isalloc(ptr, false); + old_rzsize = u2rz(old_size); + } + if (config_prof && opt_prof) { + old_ctx = prof_ctx_get(ptr); + cnt = NULL; + } + iqalloc(ptr); + ret = NULL; + goto label_return; + } else + size = 1; + } + + if (ptr != NULL) { + assert(malloc_initialized || IS_INITIALIZER); + + if (config_prof) { + old_size = isalloc(ptr, true); + if (config_valgrind && opt_valgrind) + old_rzsize = p2rz(ptr); + } else if (config_stats) { + old_size = isalloc(ptr, false); + if (config_valgrind && opt_valgrind) + old_rzsize = u2rz(old_size); + } else if (config_valgrind && opt_valgrind) { + old_size = isalloc(ptr, false); + old_rzsize = u2rz(old_size); + } + if (config_prof && opt_prof) { + usize = s2u(size); + old_ctx = prof_ctx_get(ptr); + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) { + old_ctx = NULL; + ret = NULL; + goto label_oom; + } + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && + usize <= SMALL_MAXCLASS) { + ret = iralloc(ptr, SMALL_MAXCLASS+1, 0, 0, + false, false); + if (ret != NULL) + arena_prof_promoted(ret, usize); + else + old_ctx = NULL; + } else { + ret = iralloc(ptr, size, 0, 0, false, false); + if (ret == NULL) + old_ctx = NULL; + } + } else { + if (config_stats || (config_valgrind && opt_valgrind)) + usize = s2u(size); + ret = iralloc(ptr, size, 0, 0, false, false); + } + +label_oom: + if (ret == NULL) { + if (config_xmalloc && opt_xmalloc) { + malloc_write(": Error in realloc(): " + "out of memory\n"); + abort(); + } + errno = ENOMEM; + } + } else { + /* realloc(NULL, size) is equivalent to malloc(size). */ + if (config_prof && opt_prof) + old_ctx = NULL; + if (malloc_init()) { + if (config_prof && opt_prof) + cnt = NULL; + ret = NULL; + } else { + if (config_prof && opt_prof) { + usize = s2u(size); + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) + ret = NULL; + else { + if (prof_promote && (uintptr_t)cnt != + (uintptr_t)1U && usize <= + SMALL_MAXCLASS) { + ret = imalloc(SMALL_MAXCLASS+1); + if (ret != NULL) { + arena_prof_promoted(ret, + usize); + } + } else + ret = imalloc(size); + } + } else { + if (config_stats || (config_valgrind && + opt_valgrind)) + usize = s2u(size); + ret = imalloc(size); + } + } + + if (ret == NULL) { + if (config_xmalloc && opt_xmalloc) { + malloc_write(": Error in realloc(): " + "out of memory\n"); + abort(); + } + errno = ENOMEM; + } + } + +label_return: + if (config_prof && opt_prof) + prof_realloc(ret, usize, cnt, old_size, old_ctx); + if (config_stats && ret != NULL) { + thread_allocated_t *ta; + assert(usize == isalloc(ret, config_prof)); + ta = thread_allocated_tsd_get(); + ta->allocated += usize; + ta->deallocated += old_size; + } + UTRACE(ptr, size, ret); + JEMALLOC_VALGRIND_REALLOC(ret, usize, ptr, old_size, old_rzsize, false); + return (ret); +} + +JEMALLOC_ATTR(visibility("default")) +void +je_free(void *ptr) +{ + + UTRACE(ptr, 0, 0); + if (ptr != NULL) { + size_t usize; + size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); + + assert(malloc_initialized || IS_INITIALIZER); + + if (config_prof && opt_prof) { + usize = isalloc(ptr, config_prof); + prof_free(ptr, usize); + } else if (config_stats || config_valgrind) + usize = isalloc(ptr, config_prof); + if (config_stats) + thread_allocated_tsd_get()->deallocated += usize; + if (config_valgrind && opt_valgrind) + rzsize = p2rz(ptr); + iqalloc(ptr); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); + } +} + +/* + * End malloc(3)-compatible functions. + */ +/******************************************************************************/ +/* + * Begin non-standard override functions. + */ + +#ifdef JEMALLOC_OVERRIDE_MEMALIGN +JEMALLOC_ATTR(malloc) +JEMALLOC_ATTR(visibility("default")) +void * +je_memalign(size_t alignment, size_t size) +{ + void *ret JEMALLOC_CC_SILENCE_INIT(NULL); + imemalign(&ret, alignment, size, 1); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false); + return (ret); +} +#endif + +#ifdef JEMALLOC_OVERRIDE_VALLOC +JEMALLOC_ATTR(malloc) +JEMALLOC_ATTR(visibility("default")) +void * +je_valloc(size_t size) +{ + void *ret JEMALLOC_CC_SILENCE_INIT(NULL); + imemalign(&ret, PAGE, size, 1); + JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false); + return (ret); +} +#endif + +/* + * is_malloc(je_malloc) is some macro magic to detect if jemalloc_defs.h has + * #define je_malloc malloc + */ +#define malloc_is_malloc 1 +#define is_malloc_(a) malloc_is_ ## a +#define is_malloc(a) is_malloc_(a) + +#if ((is_malloc(je_malloc) == 1) && defined(__GLIBC__) && !defined(__UCLIBC__)) +/* + * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible + * to inconsistently reference libc's malloc(3)-compatible functions + * (https://bugzilla.mozilla.org/show_bug.cgi?id=493541). + * + * These definitions interpose hooks in glibc. The functions are actually + * passed an extra argument for the caller return address, which will be + * ignored. + */ +JEMALLOC_ATTR(visibility("default")) +void (* const __free_hook)(void *ptr) = je_free; + +JEMALLOC_ATTR(visibility("default")) +void *(* const __malloc_hook)(size_t size) = je_malloc; + +JEMALLOC_ATTR(visibility("default")) +void *(* const __realloc_hook)(void *ptr, size_t size) = je_realloc; + +JEMALLOC_ATTR(visibility("default")) +void *(* const __memalign_hook)(size_t alignment, size_t size) = je_memalign; +#endif + +/* + * End non-standard override functions. + */ +/******************************************************************************/ +/* + * Begin non-standard functions. + */ + +JEMALLOC_ATTR(visibility("default")) +size_t +je_malloc_usable_size(const void *ptr) +{ + size_t ret; + + assert(malloc_initialized || IS_INITIALIZER); + + if (config_ivsalloc) + ret = ivsalloc(ptr, config_prof); + else + ret = (ptr != NULL) ? isalloc(ptr, config_prof) : 0; + + return (ret); +} + +JEMALLOC_ATTR(visibility("default")) +void +je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, + const char *opts) +{ + + stats_print(write_cb, cbopaque, opts); +} + +JEMALLOC_ATTR(visibility("default")) +int +je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) +{ + + if (malloc_init()) + return (EAGAIN); + + return (ctl_byname(name, oldp, oldlenp, newp, newlen)); +} + +JEMALLOC_ATTR(visibility("default")) +int +je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) +{ + + if (malloc_init()) + return (EAGAIN); + + return (ctl_nametomib(name, mibp, miblenp)); +} + +JEMALLOC_ATTR(visibility("default")) +int +je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + + if (malloc_init()) + return (EAGAIN); + + return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen)); +} + +/* + * End non-standard functions. + */ +/******************************************************************************/ +/* + * Begin experimental functions. + */ +#ifdef JEMALLOC_EXPERIMENTAL + +JEMALLOC_INLINE void * +iallocm(size_t usize, size_t alignment, bool zero) +{ + + assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, + alignment))); + + if (alignment != 0) + return (ipalloc(usize, alignment, zero)); + else if (zero) + return (icalloc(usize)); + else + return (imalloc(usize)); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +je_allocm(void **ptr, size_t *rsize, size_t size, int flags) +{ + void *p; + size_t usize; + size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + bool zero = flags & ALLOCM_ZERO; + prof_thr_cnt_t *cnt; + + assert(ptr != NULL); + assert(size != 0); + + if (malloc_init()) + goto label_oom; + + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); + if (usize == 0) + goto label_oom; + + if (config_prof && opt_prof) { + PROF_ALLOC_PREP(1, usize, cnt); + if (cnt == NULL) + goto label_oom; + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= + SMALL_MAXCLASS) { + size_t usize_promoted = (alignment == 0) ? + s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, + alignment); + assert(usize_promoted != 0); + p = iallocm(usize_promoted, alignment, zero); + if (p == NULL) + goto label_oom; + arena_prof_promoted(p, usize); + } else { + p = iallocm(usize, alignment, zero); + if (p == NULL) + goto label_oom; + } + prof_malloc(p, usize, cnt); + } else { + p = iallocm(usize, alignment, zero); + if (p == NULL) + goto label_oom; + } + if (rsize != NULL) + *rsize = usize; + + *ptr = p; + if (config_stats) { + assert(usize == isalloc(p, config_prof)); + thread_allocated_tsd_get()->allocated += usize; + } + UTRACE(0, size, p); + JEMALLOC_VALGRIND_MALLOC(true, p, usize, zero); + return (ALLOCM_SUCCESS); +label_oom: + if (config_xmalloc && opt_xmalloc) { + malloc_write(": Error in allocm(): " + "out of memory\n"); + abort(); + } + *ptr = NULL; + UTRACE(0, size, 0); + return (ALLOCM_ERR_OOM); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) +{ + void *p, *q; + size_t usize; + size_t old_size; + size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0); + size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + bool zero = flags & ALLOCM_ZERO; + bool no_move = flags & ALLOCM_NO_MOVE; + prof_thr_cnt_t *cnt; + + assert(ptr != NULL); + assert(*ptr != NULL); + assert(size != 0); + assert(SIZE_T_MAX - size >= extra); + assert(malloc_initialized || IS_INITIALIZER); + + p = *ptr; + if (config_prof && opt_prof) { + /* + * usize isn't knowable before iralloc() returns when extra is + * non-zero. Therefore, compute its maximum possible value and + * use that in PROF_ALLOC_PREP() to decide whether to capture a + * backtrace. prof_realloc() will use the actual usize to + * decide whether to sample. + */ + size_t max_usize = (alignment == 0) ? s2u(size+extra) : + sa2u(size+extra, alignment); + prof_ctx_t *old_ctx = prof_ctx_get(p); + old_size = isalloc(p, true); + if (config_valgrind && opt_valgrind) + old_rzsize = p2rz(p); + PROF_ALLOC_PREP(1, max_usize, cnt); + if (cnt == NULL) + goto label_oom; + /* + * Use minimum usize to determine whether promotion may happen. + */ + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U + && ((alignment == 0) ? s2u(size) : sa2u(size, alignment)) + <= SMALL_MAXCLASS) { + q = iralloc(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= + size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), + alignment, zero, no_move); + if (q == NULL) + goto label_err; + if (max_usize < PAGE) { + usize = max_usize; + arena_prof_promoted(q, usize); + } else + usize = isalloc(q, config_prof); + } else { + q = iralloc(p, size, extra, alignment, zero, no_move); + if (q == NULL) + goto label_err; + usize = isalloc(q, config_prof); + } + prof_realloc(q, usize, cnt, old_size, old_ctx); + if (rsize != NULL) + *rsize = usize; + } else { + if (config_stats) { + old_size = isalloc(p, false); + if (config_valgrind && opt_valgrind) + old_rzsize = u2rz(old_size); + } else if (config_valgrind && opt_valgrind) { + old_size = isalloc(p, false); + old_rzsize = u2rz(old_size); + } + q = iralloc(p, size, extra, alignment, zero, no_move); + if (q == NULL) + goto label_err; + if (config_stats) + usize = isalloc(q, config_prof); + if (rsize != NULL) { + if (config_stats == false) + usize = isalloc(q, config_prof); + *rsize = usize; + } + } + + *ptr = q; + if (config_stats) { + thread_allocated_t *ta; + ta = thread_allocated_tsd_get(); + ta->allocated += usize; + ta->deallocated += old_size; + } + UTRACE(p, size, q); + JEMALLOC_VALGRIND_REALLOC(q, usize, p, old_size, old_rzsize, zero); + return (ALLOCM_SUCCESS); +label_err: + if (no_move) { + UTRACE(p, size, q); + return (ALLOCM_ERR_NOT_MOVED); + } +label_oom: + if (config_xmalloc && opt_xmalloc) { + malloc_write(": Error in rallocm(): " + "out of memory\n"); + abort(); + } + UTRACE(p, size, 0); + return (ALLOCM_ERR_OOM); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +je_sallocm(const void *ptr, size_t *rsize, int flags) +{ + size_t sz; + + assert(malloc_initialized || IS_INITIALIZER); + + if (config_ivsalloc) + sz = ivsalloc(ptr, config_prof); + else { + assert(ptr != NULL); + sz = isalloc(ptr, config_prof); + } + assert(rsize != NULL); + *rsize = sz; + + return (ALLOCM_SUCCESS); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +je_dallocm(void *ptr, int flags) +{ + size_t usize; + size_t rzsize JEMALLOC_CC_SILENCE_INIT(0); + + assert(ptr != NULL); + assert(malloc_initialized || IS_INITIALIZER); + + UTRACE(ptr, 0, 0); + if (config_stats || config_valgrind) + usize = isalloc(ptr, config_prof); + if (config_prof && opt_prof) { + if (config_stats == false && config_valgrind == false) + usize = isalloc(ptr, config_prof); + prof_free(ptr, usize); + } + if (config_stats) + thread_allocated_tsd_get()->deallocated += usize; + if (config_valgrind && opt_valgrind) + rzsize = p2rz(ptr); + iqalloc(ptr); + JEMALLOC_VALGRIND_FREE(ptr, rzsize); + + return (ALLOCM_SUCCESS); +} + +JEMALLOC_ATTR(visibility("default")) +int +je_nallocm(size_t *rsize, size_t size, int flags) +{ + size_t usize; + size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + + assert(size != 0); + + if (malloc_init()) + return (ALLOCM_ERR_OOM); + + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); + if (usize == 0) + return (ALLOCM_ERR_OOM); + + if (rsize != NULL) + *rsize = usize; + return (ALLOCM_SUCCESS); +} + +#endif +/* + * End experimental functions. + */ +/******************************************************************************/ +/* + * The following functions are used by threading libraries for protection of + * malloc during fork(). + */ + +#ifndef JEMALLOC_MUTEX_INIT_CB +void +jemalloc_prefork(void) +#else +void +_malloc_prefork(void) +#endif +{ + unsigned i; + + /* Acquire all mutexes in a safe order. */ + malloc_mutex_prefork(&arenas_lock); + for (i = 0; i < narenas; i++) { + if (arenas[i] != NULL) + arena_prefork(arenas[i]); + } + base_prefork(); + huge_prefork(); + chunk_dss_prefork(); +} + +#ifndef JEMALLOC_MUTEX_INIT_CB +void +jemalloc_postfork_parent(void) +#else +void +_malloc_postfork(void) +#endif +{ + unsigned i; + + /* Release all mutexes, now that fork() has completed. */ + chunk_dss_postfork_parent(); + huge_postfork_parent(); + base_postfork_parent(); + for (i = 0; i < narenas; i++) { + if (arenas[i] != NULL) + arena_postfork_parent(arenas[i]); + } + malloc_mutex_postfork_parent(&arenas_lock); +} + +void +jemalloc_postfork_child(void) +{ + unsigned i; + + /* Release all mutexes, now that fork() has completed. */ + chunk_dss_postfork_child(); + huge_postfork_child(); + base_postfork_child(); + for (i = 0; i < narenas; i++) { + if (arenas[i] != NULL) + arena_postfork_child(arenas[i]); + } + malloc_mutex_postfork_child(&arenas_lock); +} + +/******************************************************************************/ +/* + * The following functions are used for TLS allocation/deallocation in static + * binaries on FreeBSD. The primary difference between these and i[mcd]alloc() + * is that these avoid accessing TLS variables. + */ + +static void * +a0alloc(size_t size, bool zero) +{ + + if (malloc_init()) + return (NULL); + + if (size == 0) + size = 1; + + if (size <= arena_maxclass) + return (arena_malloc(arenas[0], size, zero, false)); + else + return (huge_malloc(size, zero)); +} + +void * +a0malloc(size_t size) +{ + + return (a0alloc(size, false)); +} + +void * +a0calloc(size_t num, size_t size) +{ + + return (a0alloc(num * size, true)); +} + +void +a0free(void *ptr) +{ + arena_chunk_t *chunk; + + if (ptr == NULL) + return; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) + arena_dalloc(chunk->arena, chunk, ptr, false); + else + huge_dalloc(ptr, true); +} + +/******************************************************************************/ diff --git a/contrib/jemalloc/src/mb.c b/contrib/jemalloc/src/mb.c new file mode 100644 index 000000000000..dc2c0a256fde --- /dev/null +++ b/contrib/jemalloc/src/mb.c @@ -0,0 +1,2 @@ +#define JEMALLOC_MB_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/contrib/jemalloc/src/mutex.c b/contrib/jemalloc/src/mutex.c new file mode 100644 index 000000000000..7be5fc9ef10e --- /dev/null +++ b/contrib/jemalloc/src/mutex.c @@ -0,0 +1,153 @@ +#define JEMALLOC_MUTEX_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +#ifdef JEMALLOC_LAZY_LOCK +#include +#endif + +/******************************************************************************/ +/* Data. */ + +#ifdef JEMALLOC_LAZY_LOCK +bool isthreaded = false; +#endif +#ifdef JEMALLOC_MUTEX_INIT_CB +static bool postpone_init = true; +static malloc_mutex_t *postponed_mutexes = NULL; +#endif + +#ifdef JEMALLOC_LAZY_LOCK +static void pthread_create_once(void); +#endif + +/******************************************************************************/ +/* + * We intercept pthread_create() calls in order to toggle isthreaded if the + * process goes multi-threaded. + */ + +#ifdef JEMALLOC_LAZY_LOCK +static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *, + void *(*)(void *), void *__restrict); + +static void +pthread_create_once(void) +{ + + pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create"); + if (pthread_create_fptr == NULL) { + malloc_write(": Error in dlsym(RTLD_NEXT, " + "\"pthread_create\")\n"); + abort(); + } + + isthreaded = true; +} + +JEMALLOC_ATTR(visibility("default")) +int +pthread_create(pthread_t *__restrict thread, + const pthread_attr_t *__restrict attr, void *(*start_routine)(void *), + void *__restrict arg) +{ + static pthread_once_t once_control = PTHREAD_ONCE_INIT; + + pthread_once(&once_control, pthread_create_once); + + return (pthread_create_fptr(thread, attr, start_routine, arg)); +} +#endif + +/******************************************************************************/ + +#ifdef JEMALLOC_MUTEX_INIT_CB +int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, + void *(calloc_cb)(size_t, size_t)); + +__weak_reference(_pthread_mutex_init_calloc_cb_stub, + _pthread_mutex_init_calloc_cb); + +int +_pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex, + void *(calloc_cb)(size_t, size_t)) +{ + + return (0); +} +#endif + +bool +malloc_mutex_init(malloc_mutex_t *mutex) +{ +#ifdef JEMALLOC_OSSPIN + mutex->lock = 0; +#elif (defined(JEMALLOC_MUTEX_INIT_CB)) + if (postpone_init) { + mutex->postponed_next = postponed_mutexes; + postponed_mutexes = mutex; + } else { + if (_pthread_mutex_init_calloc_cb(&mutex->lock, base_calloc) != + 0) + return (true); + } +#else + pthread_mutexattr_t attr; + + if (pthread_mutexattr_init(&attr) != 0) + return (true); + pthread_mutexattr_settype(&attr, MALLOC_MUTEX_TYPE); + if (pthread_mutex_init(&mutex->lock, &attr) != 0) { + pthread_mutexattr_destroy(&attr); + return (true); + } + pthread_mutexattr_destroy(&attr); + +#endif + return (false); +} + +void +malloc_mutex_prefork(malloc_mutex_t *mutex) +{ + + malloc_mutex_lock(mutex); +} + +void +malloc_mutex_postfork_parent(malloc_mutex_t *mutex) +{ + + malloc_mutex_unlock(mutex); +} + +void +malloc_mutex_postfork_child(malloc_mutex_t *mutex) +{ + +#ifdef JEMALLOC_MUTEX_INIT_CB + malloc_mutex_unlock(mutex); +#else + if (malloc_mutex_init(mutex)) { + malloc_printf(": Error re-initializing mutex in " + "child\n"); + if (opt_abort) + abort(); + } +#endif +} + +bool +mutex_boot(void) +{ + +#ifdef JEMALLOC_MUTEX_INIT_CB + postpone_init = false; + while (postponed_mutexes != NULL) { + if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock, + base_calloc) != 0) + return (true); + postponed_mutexes = postponed_mutexes->postponed_next; + } +#endif + return (false); +} diff --git a/contrib/jemalloc/src/prof.c b/contrib/jemalloc/src/prof.c new file mode 100644 index 000000000000..b509aaef0765 --- /dev/null +++ b/contrib/jemalloc/src/prof.c @@ -0,0 +1,1243 @@ +#define JEMALLOC_PROF_C_ +#include "jemalloc/internal/jemalloc_internal.h" +/******************************************************************************/ + +#ifdef JEMALLOC_PROF_LIBUNWIND +#define UNW_LOCAL_ONLY +#include +#endif + +#ifdef JEMALLOC_PROF_LIBGCC +#include +#endif + +/******************************************************************************/ +/* Data. */ + +malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL) + +bool opt_prof = false; +bool opt_prof_active = true; +size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; +ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; +bool opt_prof_gdump = false; +bool opt_prof_leak = false; +bool opt_prof_accum = true; +char opt_prof_prefix[PATH_MAX + 1]; + +uint64_t prof_interval; +bool prof_promote; + +/* + * Table of mutexes that are shared among ctx's. These are leaf locks, so + * there is no problem with using them for more than one ctx at the same time. + * The primary motivation for this sharing though is that ctx's are ephemeral, + * and destroying mutexes causes complications for systems that allocate when + * creating/destroying mutexes. + */ +static malloc_mutex_t *ctx_locks; +static unsigned cum_ctxs; /* Atomic counter. */ + +/* + * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data + * structure that knows about all backtraces currently captured. + */ +static ckh_t bt2ctx; +static malloc_mutex_t bt2ctx_mtx; + +static malloc_mutex_t prof_dump_seq_mtx; +static uint64_t prof_dump_seq; +static uint64_t prof_dump_iseq; +static uint64_t prof_dump_mseq; +static uint64_t prof_dump_useq; + +/* + * This buffer is rather large for stack allocation, so use a single buffer for + * all profile dumps. The buffer is implicitly protected by bt2ctx_mtx, since + * it must be locked anyway during dumping. + */ +static char prof_dump_buf[PROF_DUMP_BUFSIZE]; +static unsigned prof_dump_buf_end; +static int prof_dump_fd; + +/* Do not dump any profiles until bootstrapping is complete. */ +static bool prof_booted = false; + +static malloc_mutex_t enq_mtx; +static bool enq; +static bool enq_idump; +static bool enq_gdump; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static prof_bt_t *bt_dup(prof_bt_t *bt); +static void bt_destroy(prof_bt_t *bt); +#ifdef JEMALLOC_PROF_LIBGCC +static _Unwind_Reason_Code prof_unwind_init_callback( + struct _Unwind_Context *context, void *arg); +static _Unwind_Reason_Code prof_unwind_callback( + struct _Unwind_Context *context, void *arg); +#endif +static bool prof_flush(bool propagate_err); +static bool prof_write(bool propagate_err, const char *s); +static bool prof_printf(bool propagate_err, const char *format, ...) + JEMALLOC_ATTR(format(printf, 2, 3)); +static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, + size_t *leak_nctx); +static void prof_ctx_destroy(prof_ctx_t *ctx); +static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt); +static bool prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, + prof_bt_t *bt); +static bool prof_dump_maps(bool propagate_err); +static bool prof_dump(bool propagate_err, const char *filename, + bool leakcheck); +static void prof_dump_filename(char *filename, char v, int64_t vseq); +static void prof_fdump(void); +static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, + size_t *hash2); +static bool prof_bt_keycomp(const void *k1, const void *k2); +static malloc_mutex_t *prof_ctx_mutex_choose(void); + +/******************************************************************************/ + +void +bt_init(prof_bt_t *bt, void **vec) +{ + + cassert(config_prof); + + bt->vec = vec; + bt->len = 0; +} + +static void +bt_destroy(prof_bt_t *bt) +{ + + cassert(config_prof); + + idalloc(bt); +} + +static prof_bt_t * +bt_dup(prof_bt_t *bt) +{ + prof_bt_t *ret; + + cassert(config_prof); + + /* + * Create a single allocation that has space for vec immediately + * following the prof_bt_t structure. The backtraces that get + * stored in the backtrace caches are copied from stack-allocated + * temporary variables, so size is known at creation time. Making this + * a contiguous object improves cache locality. + */ + ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) + + (bt->len * sizeof(void *))); + if (ret == NULL) + return (NULL); + ret->vec = (void **)((uintptr_t)ret + + QUANTUM_CEILING(sizeof(prof_bt_t))); + memcpy(ret->vec, bt->vec, bt->len * sizeof(void *)); + ret->len = bt->len; + + return (ret); +} + +static inline void +prof_enter(void) +{ + + cassert(config_prof); + + malloc_mutex_lock(&enq_mtx); + enq = true; + malloc_mutex_unlock(&enq_mtx); + + malloc_mutex_lock(&bt2ctx_mtx); +} + +static inline void +prof_leave(void) +{ + bool idump, gdump; + + cassert(config_prof); + + malloc_mutex_unlock(&bt2ctx_mtx); + + malloc_mutex_lock(&enq_mtx); + enq = false; + idump = enq_idump; + enq_idump = false; + gdump = enq_gdump; + enq_gdump = false; + malloc_mutex_unlock(&enq_mtx); + + if (idump) + prof_idump(); + if (gdump) + prof_gdump(); +} + +#ifdef JEMALLOC_PROF_LIBUNWIND +void +prof_backtrace(prof_bt_t *bt, unsigned nignore) +{ + unw_context_t uc; + unw_cursor_t cursor; + unsigned i; + int err; + + cassert(config_prof); + assert(bt->len == 0); + assert(bt->vec != NULL); + + unw_getcontext(&uc); + unw_init_local(&cursor, &uc); + + /* Throw away (nignore+1) stack frames, if that many exist. */ + for (i = 0; i < nignore + 1; i++) { + err = unw_step(&cursor); + if (err <= 0) + return; + } + + /* + * Iterate over stack frames until there are no more, or until no space + * remains in bt. + */ + for (i = 0; i < PROF_BT_MAX; i++) { + unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); + bt->len++; + err = unw_step(&cursor); + if (err <= 0) + break; + } +} +#elif (defined(JEMALLOC_PROF_LIBGCC)) +static _Unwind_Reason_Code +prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) +{ + + cassert(config_prof); + + return (_URC_NO_REASON); +} + +static _Unwind_Reason_Code +prof_unwind_callback(struct _Unwind_Context *context, void *arg) +{ + prof_unwind_data_t *data = (prof_unwind_data_t *)arg; + + cassert(config_prof); + + if (data->nignore > 0) + data->nignore--; + else { + data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); + data->bt->len++; + if (data->bt->len == data->max) + return (_URC_END_OF_STACK); + } + + return (_URC_NO_REASON); +} + +void +prof_backtrace(prof_bt_t *bt, unsigned nignore) +{ + prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX}; + + cassert(config_prof); + + _Unwind_Backtrace(prof_unwind_callback, &data); +} +#elif (defined(JEMALLOC_PROF_GCC)) +void +prof_backtrace(prof_bt_t *bt, unsigned nignore) +{ +#define BT_FRAME(i) \ + if ((i) < nignore + PROF_BT_MAX) { \ + void *p; \ + if (__builtin_frame_address(i) == 0) \ + return; \ + p = __builtin_return_address(i); \ + if (p == NULL) \ + return; \ + if (i >= nignore) { \ + bt->vec[(i) - nignore] = p; \ + bt->len = (i) - nignore + 1; \ + } \ + } else \ + return; + + cassert(config_prof); + assert(nignore <= 3); + + BT_FRAME(0) + BT_FRAME(1) + BT_FRAME(2) + BT_FRAME(3) + BT_FRAME(4) + BT_FRAME(5) + BT_FRAME(6) + BT_FRAME(7) + BT_FRAME(8) + BT_FRAME(9) + + BT_FRAME(10) + BT_FRAME(11) + BT_FRAME(12) + BT_FRAME(13) + BT_FRAME(14) + BT_FRAME(15) + BT_FRAME(16) + BT_FRAME(17) + BT_FRAME(18) + BT_FRAME(19) + + BT_FRAME(20) + BT_FRAME(21) + BT_FRAME(22) + BT_FRAME(23) + BT_FRAME(24) + BT_FRAME(25) + BT_FRAME(26) + BT_FRAME(27) + BT_FRAME(28) + BT_FRAME(29) + + BT_FRAME(30) + BT_FRAME(31) + BT_FRAME(32) + BT_FRAME(33) + BT_FRAME(34) + BT_FRAME(35) + BT_FRAME(36) + BT_FRAME(37) + BT_FRAME(38) + BT_FRAME(39) + + BT_FRAME(40) + BT_FRAME(41) + BT_FRAME(42) + BT_FRAME(43) + BT_FRAME(44) + BT_FRAME(45) + BT_FRAME(46) + BT_FRAME(47) + BT_FRAME(48) + BT_FRAME(49) + + BT_FRAME(50) + BT_FRAME(51) + BT_FRAME(52) + BT_FRAME(53) + BT_FRAME(54) + BT_FRAME(55) + BT_FRAME(56) + BT_FRAME(57) + BT_FRAME(58) + BT_FRAME(59) + + BT_FRAME(60) + BT_FRAME(61) + BT_FRAME(62) + BT_FRAME(63) + BT_FRAME(64) + BT_FRAME(65) + BT_FRAME(66) + BT_FRAME(67) + BT_FRAME(68) + BT_FRAME(69) + + BT_FRAME(70) + BT_FRAME(71) + BT_FRAME(72) + BT_FRAME(73) + BT_FRAME(74) + BT_FRAME(75) + BT_FRAME(76) + BT_FRAME(77) + BT_FRAME(78) + BT_FRAME(79) + + BT_FRAME(80) + BT_FRAME(81) + BT_FRAME(82) + BT_FRAME(83) + BT_FRAME(84) + BT_FRAME(85) + BT_FRAME(86) + BT_FRAME(87) + BT_FRAME(88) + BT_FRAME(89) + + BT_FRAME(90) + BT_FRAME(91) + BT_FRAME(92) + BT_FRAME(93) + BT_FRAME(94) + BT_FRAME(95) + BT_FRAME(96) + BT_FRAME(97) + BT_FRAME(98) + BT_FRAME(99) + + BT_FRAME(100) + BT_FRAME(101) + BT_FRAME(102) + BT_FRAME(103) + BT_FRAME(104) + BT_FRAME(105) + BT_FRAME(106) + BT_FRAME(107) + BT_FRAME(108) + BT_FRAME(109) + + BT_FRAME(110) + BT_FRAME(111) + BT_FRAME(112) + BT_FRAME(113) + BT_FRAME(114) + BT_FRAME(115) + BT_FRAME(116) + BT_FRAME(117) + BT_FRAME(118) + BT_FRAME(119) + + BT_FRAME(120) + BT_FRAME(121) + BT_FRAME(122) + BT_FRAME(123) + BT_FRAME(124) + BT_FRAME(125) + BT_FRAME(126) + BT_FRAME(127) + + /* Extras to compensate for nignore. */ + BT_FRAME(128) + BT_FRAME(129) + BT_FRAME(130) +#undef BT_FRAME +} +#else +void +prof_backtrace(prof_bt_t *bt, unsigned nignore) +{ + + cassert(config_prof); + assert(false); +} +#endif + +prof_thr_cnt_t * +prof_lookup(prof_bt_t *bt) +{ + union { + prof_thr_cnt_t *p; + void *v; + } ret; + prof_tdata_t *prof_tdata; + + cassert(config_prof); + + prof_tdata = *prof_tdata_tsd_get(); + if (prof_tdata == NULL) { + prof_tdata = prof_tdata_init(); + if (prof_tdata == NULL) + return (NULL); + } + + if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { + union { + prof_bt_t *p; + void *v; + } btkey; + union { + prof_ctx_t *p; + void *v; + } ctx; + bool new_ctx; + + /* + * This thread's cache lacks bt. Look for it in the global + * cache. + */ + prof_enter(); + if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { + /* bt has never been seen before. Insert it. */ + ctx.v = imalloc(sizeof(prof_ctx_t)); + if (ctx.v == NULL) { + prof_leave(); + return (NULL); + } + btkey.p = bt_dup(bt); + if (btkey.v == NULL) { + prof_leave(); + idalloc(ctx.v); + return (NULL); + } + ctx.p->bt = btkey.p; + ctx.p->lock = prof_ctx_mutex_choose(); + memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); + ql_new(&ctx.p->cnts_ql); + if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { + /* OOM. */ + prof_leave(); + idalloc(btkey.v); + idalloc(ctx.v); + return (NULL); + } + /* + * Artificially raise curobjs, in order to avoid a race + * condition with prof_ctx_merge()/prof_ctx_destroy(). + * + * No locking is necessary for ctx here because no other + * threads have had the opportunity to fetch it from + * bt2ctx yet. + */ + ctx.p->cnt_merged.curobjs++; + new_ctx = true; + } else { + /* + * Artificially raise curobjs, in order to avoid a race + * condition with prof_ctx_merge()/prof_ctx_destroy(). + */ + malloc_mutex_lock(ctx.p->lock); + ctx.p->cnt_merged.curobjs++; + malloc_mutex_unlock(ctx.p->lock); + new_ctx = false; + } + prof_leave(); + + /* Link a prof_thd_cnt_t into ctx for this thread. */ + if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { + assert(ckh_count(&prof_tdata->bt2cnt) > 0); + /* + * Flush the least recently used cnt in order to keep + * bt2cnt from becoming too large. + */ + ret.p = ql_last(&prof_tdata->lru_ql, lru_link); + assert(ret.v != NULL); + if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, + NULL, NULL)) + assert(false); + ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); + prof_ctx_merge(ret.p->ctx, ret.p); + /* ret can now be re-used. */ + } else { + assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX); + /* Allocate and partially initialize a new cnt. */ + ret.v = imalloc(sizeof(prof_thr_cnt_t)); + if (ret.p == NULL) { + if (new_ctx) + prof_ctx_destroy(ctx.p); + return (NULL); + } + ql_elm_new(ret.p, cnts_link); + ql_elm_new(ret.p, lru_link); + } + /* Finish initializing ret. */ + ret.p->ctx = ctx.p; + ret.p->epoch = 0; + memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); + if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { + if (new_ctx) + prof_ctx_destroy(ctx.p); + idalloc(ret.v); + return (NULL); + } + ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); + malloc_mutex_lock(ctx.p->lock); + ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); + ctx.p->cnt_merged.curobjs--; + malloc_mutex_unlock(ctx.p->lock); + } else { + /* Move ret to the front of the LRU. */ + ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); + ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); + } + + return (ret.p); +} + +static bool +prof_flush(bool propagate_err) +{ + bool ret = false; + ssize_t err; + + cassert(config_prof); + + err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); + if (err == -1) { + if (propagate_err == false) { + malloc_write(": write() failed during heap " + "profile flush\n"); + if (opt_abort) + abort(); + } + ret = true; + } + prof_dump_buf_end = 0; + + return (ret); +} + +static bool +prof_write(bool propagate_err, const char *s) +{ + unsigned i, slen, n; + + cassert(config_prof); + + i = 0; + slen = strlen(s); + while (i < slen) { + /* Flush the buffer if it is full. */ + if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) + if (prof_flush(propagate_err) && propagate_err) + return (true); + + if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { + /* Finish writing. */ + n = slen - i; + } else { + /* Write as much of s as will fit. */ + n = PROF_DUMP_BUFSIZE - prof_dump_buf_end; + } + memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); + prof_dump_buf_end += n; + i += n; + } + + return (false); +} + +JEMALLOC_ATTR(format(printf, 2, 3)) +static bool +prof_printf(bool propagate_err, const char *format, ...) +{ + bool ret; + va_list ap; + char buf[PROF_PRINTF_BUFSIZE]; + + va_start(ap, format); + malloc_vsnprintf(buf, sizeof(buf), format, ap); + va_end(ap); + ret = prof_write(propagate_err, buf); + + return (ret); +} + +static void +prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) +{ + prof_thr_cnt_t *thr_cnt; + prof_cnt_t tcnt; + + cassert(config_prof); + + malloc_mutex_lock(ctx->lock); + + memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); + ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { + volatile unsigned *epoch = &thr_cnt->epoch; + + while (true) { + unsigned epoch0 = *epoch; + + /* Make sure epoch is even. */ + if (epoch0 & 1U) + continue; + + memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); + + /* Terminate if epoch didn't change while reading. */ + if (*epoch == epoch0) + break; + } + + ctx->cnt_summed.curobjs += tcnt.curobjs; + ctx->cnt_summed.curbytes += tcnt.curbytes; + if (opt_prof_accum) { + ctx->cnt_summed.accumobjs += tcnt.accumobjs; + ctx->cnt_summed.accumbytes += tcnt.accumbytes; + } + } + + if (ctx->cnt_summed.curobjs != 0) + (*leak_nctx)++; + + /* Add to cnt_all. */ + cnt_all->curobjs += ctx->cnt_summed.curobjs; + cnt_all->curbytes += ctx->cnt_summed.curbytes; + if (opt_prof_accum) { + cnt_all->accumobjs += ctx->cnt_summed.accumobjs; + cnt_all->accumbytes += ctx->cnt_summed.accumbytes; + } + + malloc_mutex_unlock(ctx->lock); +} + +static void +prof_ctx_destroy(prof_ctx_t *ctx) +{ + + cassert(config_prof); + + /* + * Check that ctx is still unused by any thread cache before destroying + * it. prof_lookup() artificially raises ctx->cnt_merge.curobjs in + * order to avoid a race condition with this function, as does + * prof_ctx_merge() in order to avoid a race between the main body of + * prof_ctx_merge() and entry into this function. + */ + prof_enter(); + malloc_mutex_lock(ctx->lock); + if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) { + assert(ctx->cnt_merged.curbytes == 0); + assert(ctx->cnt_merged.accumobjs == 0); + assert(ctx->cnt_merged.accumbytes == 0); + /* Remove ctx from bt2ctx. */ + if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) + assert(false); + prof_leave(); + /* Destroy ctx. */ + malloc_mutex_unlock(ctx->lock); + bt_destroy(ctx->bt); + idalloc(ctx); + } else { + /* + * Compensate for increment in prof_ctx_merge() or + * prof_lookup(). + */ + ctx->cnt_merged.curobjs--; + malloc_mutex_unlock(ctx->lock); + prof_leave(); + } +} + +static void +prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) +{ + bool destroy; + + cassert(config_prof); + + /* Merge cnt stats and detach from ctx. */ + malloc_mutex_lock(ctx->lock); + ctx->cnt_merged.curobjs += cnt->cnts.curobjs; + ctx->cnt_merged.curbytes += cnt->cnts.curbytes; + ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; + ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; + ql_remove(&ctx->cnts_ql, cnt, cnts_link); + if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && + ctx->cnt_merged.curobjs == 0) { + /* + * Artificially raise ctx->cnt_merged.curobjs in order to keep + * another thread from winning the race to destroy ctx while + * this one has ctx->lock dropped. Without this, it would be + * possible for another thread to: + * + * 1) Sample an allocation associated with ctx. + * 2) Deallocate the sampled object. + * 3) Successfully prof_ctx_destroy(ctx). + * + * The result would be that ctx no longer exists by the time + * this thread accesses it in prof_ctx_destroy(). + */ + ctx->cnt_merged.curobjs++; + destroy = true; + } else + destroy = false; + malloc_mutex_unlock(ctx->lock); + if (destroy) + prof_ctx_destroy(ctx); +} + +static bool +prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt) +{ + unsigned i; + + cassert(config_prof); + + if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) { + assert(ctx->cnt_summed.curbytes == 0); + assert(ctx->cnt_summed.accumobjs == 0); + assert(ctx->cnt_summed.accumbytes == 0); + return (false); + } + + if (prof_printf(propagate_err, "%"PRId64": %"PRId64 + " [%"PRIu64": %"PRIu64"] @", + ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes, + ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) + return (true); + + for (i = 0; i < bt->len; i++) { + if (prof_printf(propagate_err, " %#"PRIxPTR, + (uintptr_t)bt->vec[i])) + return (true); + } + + if (prof_write(propagate_err, "\n")) + return (true); + + return (false); +} + +static bool +prof_dump_maps(bool propagate_err) +{ + int mfd; + char filename[PATH_MAX + 1]; + + cassert(config_prof); + + malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps", + (int)getpid()); + mfd = open(filename, O_RDONLY); + if (mfd != -1) { + ssize_t nread; + + if (prof_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && + propagate_err) + return (true); + nread = 0; + do { + prof_dump_buf_end += nread; + if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { + /* Make space in prof_dump_buf before read(). */ + if (prof_flush(propagate_err) && propagate_err) + return (true); + } + nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], + PROF_DUMP_BUFSIZE - prof_dump_buf_end); + } while (nread > 0); + close(mfd); + } else + return (true); + + return (false); +} + +static bool +prof_dump(bool propagate_err, const char *filename, bool leakcheck) +{ + prof_cnt_t cnt_all; + size_t tabind; + union { + prof_bt_t *p; + void *v; + } bt; + union { + prof_ctx_t *p; + void *v; + } ctx; + size_t leak_nctx; + + cassert(config_prof); + + prof_enter(); + prof_dump_fd = creat(filename, 0644); + if (prof_dump_fd == -1) { + if (propagate_err == false) { + malloc_printf( + ": creat(\"%s\"), 0644) failed\n", + filename); + if (opt_abort) + abort(); + } + goto label_error; + } + + /* Merge per thread profile stats, and sum them in cnt_all. */ + memset(&cnt_all, 0, sizeof(prof_cnt_t)); + leak_nctx = 0; + for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) + prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx); + + /* Dump profile header. */ + if (opt_lg_prof_sample == 0) { + if (prof_printf(propagate_err, + "heap profile: %"PRId64": %"PRId64 + " [%"PRIu64": %"PRIu64"] @ heapprofile\n", + cnt_all.curobjs, cnt_all.curbytes, + cnt_all.accumobjs, cnt_all.accumbytes)) + goto label_error; + } else { + if (prof_printf(propagate_err, + "heap profile: %"PRId64": %"PRId64 + " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", + cnt_all.curobjs, cnt_all.curbytes, + cnt_all.accumobjs, cnt_all.accumbytes, + ((uint64_t)1U << opt_lg_prof_sample))) + goto label_error; + } + + /* Dump per ctx profile stats. */ + for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v) + == false;) { + if (prof_dump_ctx(propagate_err, ctx.p, bt.p)) + goto label_error; + } + + /* Dump /proc//maps if possible. */ + if (prof_dump_maps(propagate_err)) + goto label_error; + + if (prof_flush(propagate_err)) + goto label_error; + close(prof_dump_fd); + prof_leave(); + + if (leakcheck && cnt_all.curbytes != 0) { + malloc_printf(": Leak summary: %"PRId64" byte%s, %" + PRId64" object%s, %zu context%s\n", + cnt_all.curbytes, (cnt_all.curbytes != 1) ? "s" : "", + cnt_all.curobjs, (cnt_all.curobjs != 1) ? "s" : "", + leak_nctx, (leak_nctx != 1) ? "s" : ""); + malloc_printf( + ": Run pprof on \"%s\" for leak detail\n", + filename); + } + + return (false); +label_error: + prof_leave(); + return (true); +} + +#define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) +static void +prof_dump_filename(char *filename, char v, int64_t vseq) +{ + + cassert(config_prof); + + if (vseq != UINT64_C(0xffffffffffffffff)) { + /* "...v.heap" */ + malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, + "%s.%d.%"PRIu64".%c%"PRId64".heap", + opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); + } else { + /* "....heap" */ + malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, + "%s.%d.%"PRIu64".%c.heap", + opt_prof_prefix, (int)getpid(), prof_dump_seq, v); + } +} + +static void +prof_fdump(void) +{ + char filename[DUMP_FILENAME_BUFSIZE]; + + cassert(config_prof); + + if (prof_booted == false) + return; + + if (opt_prof_prefix[0] != '\0') { + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'f', UINT64_C(0xffffffffffffffff)); + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(false, filename, opt_prof_leak); + } +} + +void +prof_idump(void) +{ + char filename[PATH_MAX + 1]; + + cassert(config_prof); + + if (prof_booted == false) + return; + malloc_mutex_lock(&enq_mtx); + if (enq) { + enq_idump = true; + malloc_mutex_unlock(&enq_mtx); + return; + } + malloc_mutex_unlock(&enq_mtx); + + if (opt_prof_prefix[0] != '\0') { + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'i', prof_dump_iseq); + prof_dump_iseq++; + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(false, filename, false); + } +} + +bool +prof_mdump(const char *filename) +{ + char filename_buf[DUMP_FILENAME_BUFSIZE]; + + cassert(config_prof); + + if (opt_prof == false || prof_booted == false) + return (true); + + if (filename == NULL) { + /* No filename specified, so automatically generate one. */ + if (opt_prof_prefix[0] == '\0') + return (true); + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename_buf, 'm', prof_dump_mseq); + prof_dump_mseq++; + malloc_mutex_unlock(&prof_dump_seq_mtx); + filename = filename_buf; + } + return (prof_dump(true, filename, false)); +} + +void +prof_gdump(void) +{ + char filename[DUMP_FILENAME_BUFSIZE]; + + cassert(config_prof); + + if (prof_booted == false) + return; + malloc_mutex_lock(&enq_mtx); + if (enq) { + enq_gdump = true; + malloc_mutex_unlock(&enq_mtx); + return; + } + malloc_mutex_unlock(&enq_mtx); + + if (opt_prof_prefix[0] != '\0') { + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'u', prof_dump_useq); + prof_dump_useq++; + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(false, filename, false); + } +} + +static void +prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) +{ + size_t ret1, ret2; + uint64_t h; + prof_bt_t *bt = (prof_bt_t *)key; + + cassert(config_prof); + assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); + assert(hash1 != NULL); + assert(hash2 != NULL); + + h = hash(bt->vec, bt->len * sizeof(void *), + UINT64_C(0x94122f335b332aea)); + if (minbits <= 32) { + /* + * Avoid doing multiple hashes, since a single hash provides + * enough bits. + */ + ret1 = h & ZU(0xffffffffU); + ret2 = h >> 32; + } else { + ret1 = h; + ret2 = hash(bt->vec, bt->len * sizeof(void *), + UINT64_C(0x8432a476666bbc13)); + } + + *hash1 = ret1; + *hash2 = ret2; +} + +static bool +prof_bt_keycomp(const void *k1, const void *k2) +{ + const prof_bt_t *bt1 = (prof_bt_t *)k1; + const prof_bt_t *bt2 = (prof_bt_t *)k2; + + cassert(config_prof); + + if (bt1->len != bt2->len) + return (false); + return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); +} + +static malloc_mutex_t * +prof_ctx_mutex_choose(void) +{ + unsigned nctxs = atomic_add_u(&cum_ctxs, 1); + + return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); +} + +prof_tdata_t * +prof_tdata_init(void) +{ + prof_tdata_t *prof_tdata; + + cassert(config_prof); + + /* Initialize an empty cache for this thread. */ + prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); + if (prof_tdata == NULL) + return (NULL); + + if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS, + prof_bt_hash, prof_bt_keycomp)) { + idalloc(prof_tdata); + return (NULL); + } + ql_new(&prof_tdata->lru_ql); + + prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX); + if (prof_tdata->vec == NULL) { + ckh_delete(&prof_tdata->bt2cnt); + idalloc(prof_tdata); + return (NULL); + } + + prof_tdata->prng_state = 0; + prof_tdata->threshold = 0; + prof_tdata->accum = 0; + + prof_tdata_tsd_set(&prof_tdata); + + return (prof_tdata); +} + +void +prof_tdata_cleanup(void *arg) +{ + prof_thr_cnt_t *cnt; + prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg; + + cassert(config_prof); + + /* + * Delete the hash table. All of its contents can still be iterated + * over via the LRU. + */ + ckh_delete(&prof_tdata->bt2cnt); + + /* Iteratively merge cnt's into the global stats and delete them. */ + while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { + ql_remove(&prof_tdata->lru_ql, cnt, lru_link); + prof_ctx_merge(cnt->ctx, cnt); + idalloc(cnt); + } + + idalloc(prof_tdata->vec); + + idalloc(prof_tdata); + prof_tdata = NULL; + prof_tdata_tsd_set(&prof_tdata); +} + +void +prof_boot0(void) +{ + + cassert(config_prof); + + memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, + sizeof(PROF_PREFIX_DEFAULT)); +} + +void +prof_boot1(void) +{ + + cassert(config_prof); + + /* + * opt_prof and prof_promote must be in their final state before any + * arenas are initialized, so this function must be executed early. + */ + + if (opt_prof_leak && opt_prof == false) { + /* + * Enable opt_prof, but in such a way that profiles are never + * automatically dumped. + */ + opt_prof = true; + opt_prof_gdump = false; + prof_interval = 0; + } else if (opt_prof) { + if (opt_lg_prof_interval >= 0) { + prof_interval = (((uint64_t)1U) << + opt_lg_prof_interval); + } else + prof_interval = 0; + } + + prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE); +} + +bool +prof_boot2(void) +{ + + cassert(config_prof); + + if (opt_prof) { + unsigned i; + + if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, + prof_bt_keycomp)) + return (true); + if (malloc_mutex_init(&bt2ctx_mtx)) + return (true); + if (prof_tdata_tsd_boot()) { + malloc_write( + ": Error in pthread_key_create()\n"); + abort(); + } + + if (malloc_mutex_init(&prof_dump_seq_mtx)) + return (true); + + if (malloc_mutex_init(&enq_mtx)) + return (true); + enq = false; + enq_idump = false; + enq_gdump = false; + + if (atexit(prof_fdump) != 0) { + malloc_write(": Error in atexit()\n"); + if (opt_abort) + abort(); + } + + ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * + sizeof(malloc_mutex_t)); + if (ctx_locks == NULL) + return (true); + for (i = 0; i < PROF_NCTX_LOCKS; i++) { + if (malloc_mutex_init(&ctx_locks[i])) + return (true); + } + } + +#ifdef JEMALLOC_PROF_LIBGCC + /* + * Cause the backtracing machinery to allocate its internal state + * before enabling profiling. + */ + _Unwind_Backtrace(prof_unwind_init_callback, NULL); +#endif + + prof_booted = true; + + return (false); +} + +/******************************************************************************/ diff --git a/contrib/jemalloc/src/quarantine.c b/contrib/jemalloc/src/quarantine.c new file mode 100644 index 000000000000..89a25c6a3169 --- /dev/null +++ b/contrib/jemalloc/src/quarantine.c @@ -0,0 +1,163 @@ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +typedef struct quarantine_s quarantine_t; + +struct quarantine_s { + size_t curbytes; + size_t curobjs; + size_t first; +#define LG_MAXOBJS_INIT 10 + size_t lg_maxobjs; + void *objs[1]; /* Dynamically sized ring buffer. */ +}; + +static void quarantine_cleanup(void *arg); + +malloc_tsd_data(static, quarantine, quarantine_t *, NULL) +malloc_tsd_funcs(JEMALLOC_INLINE, quarantine, quarantine_t *, NULL, + quarantine_cleanup) + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static quarantine_t *quarantine_init(size_t lg_maxobjs); +static quarantine_t *quarantine_grow(quarantine_t *quarantine); +static void quarantine_drain(quarantine_t *quarantine, size_t upper_bound); + +/******************************************************************************/ + +static quarantine_t * +quarantine_init(size_t lg_maxobjs) +{ + quarantine_t *quarantine; + + quarantine = (quarantine_t *)imalloc(offsetof(quarantine_t, objs) + + ((ZU(1) << lg_maxobjs) * sizeof(void *))); + if (quarantine == NULL) + return (NULL); + quarantine->curbytes = 0; + quarantine->curobjs = 0; + quarantine->first = 0; + quarantine->lg_maxobjs = lg_maxobjs; + + quarantine_tsd_set(&quarantine); + + return (quarantine); +} + +static quarantine_t * +quarantine_grow(quarantine_t *quarantine) +{ + quarantine_t *ret; + + ret = quarantine_init(quarantine->lg_maxobjs + 1); + if (ret == NULL) + return (quarantine); + + ret->curbytes = quarantine->curbytes; + if (quarantine->first + quarantine->curobjs < (ZU(1) << + quarantine->lg_maxobjs)) { + /* objs ring buffer data are contiguous. */ + memcpy(ret->objs, &quarantine->objs[quarantine->first], + quarantine->curobjs * sizeof(void *)); + ret->curobjs = quarantine->curobjs; + } else { + /* objs ring buffer data wrap around. */ + size_t ncopy = (ZU(1) << quarantine->lg_maxobjs) - + quarantine->first; + memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy * + sizeof(void *)); + ret->curobjs = ncopy; + if (quarantine->curobjs != 0) { + memcpy(&ret->objs[ret->curobjs], quarantine->objs, + quarantine->curobjs - ncopy); + } + } + + return (ret); +} + +static void +quarantine_drain(quarantine_t *quarantine, size_t upper_bound) +{ + + while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0) { + void *ptr = quarantine->objs[quarantine->first]; + size_t usize = isalloc(ptr, config_prof); + idalloc(ptr); + quarantine->curbytes -= usize; + quarantine->curobjs--; + quarantine->first = (quarantine->first + 1) & ((ZU(1) << + quarantine->lg_maxobjs) - 1); + } +} + +void +quarantine(void *ptr) +{ + quarantine_t *quarantine; + size_t usize = isalloc(ptr, config_prof); + + assert(config_fill); + assert(opt_quarantine); + + quarantine = *quarantine_tsd_get(); + if (quarantine == NULL && (quarantine = + quarantine_init(LG_MAXOBJS_INIT)) == NULL) { + idalloc(ptr); + return; + } + /* + * Drain one or more objects if the quarantine size limit would be + * exceeded by appending ptr. + */ + if (quarantine->curbytes + usize > opt_quarantine) { + size_t upper_bound = (opt_quarantine >= usize) ? opt_quarantine + - usize : 0; + quarantine_drain(quarantine, upper_bound); + } + /* Grow the quarantine ring buffer if it's full. */ + if (quarantine->curobjs == (ZU(1) << quarantine->lg_maxobjs)) + quarantine = quarantine_grow(quarantine); + /* quarantine_grow() must free a slot if it fails to grow. */ + assert(quarantine->curobjs < (ZU(1) << quarantine->lg_maxobjs)); + /* Append ptr if its size doesn't exceed the quarantine size. */ + if (quarantine->curbytes + usize <= opt_quarantine) { + size_t offset = (quarantine->first + quarantine->curobjs) & + ((ZU(1) << quarantine->lg_maxobjs) - 1); + quarantine->objs[offset] = ptr; + quarantine->curbytes += usize; + quarantine->curobjs++; + if (opt_junk) + memset(ptr, 0x5a, usize); + } else { + assert(quarantine->curbytes == 0); + idalloc(ptr); + } +} + +static void +quarantine_cleanup(void *arg) +{ + quarantine_t *quarantine = *(quarantine_t **)arg; + + if (quarantine != NULL) { + quarantine_drain(quarantine, 0); + idalloc(quarantine); + } +} + +bool +quarantine_boot(void) +{ + + assert(config_fill); + + if (quarantine_tsd_boot()) + return (true); + + return (false); +} diff --git a/contrib/jemalloc/src/rtree.c b/contrib/jemalloc/src/rtree.c new file mode 100644 index 000000000000..eb0ff1e24afc --- /dev/null +++ b/contrib/jemalloc/src/rtree.c @@ -0,0 +1,46 @@ +#define JEMALLOC_RTREE_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +rtree_t * +rtree_new(unsigned bits) +{ + rtree_t *ret; + unsigned bits_per_level, height, i; + + bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1; + height = bits / bits_per_level; + if (height * bits_per_level != bits) + height++; + assert(height * bits_per_level >= bits); + + ret = (rtree_t*)base_alloc(offsetof(rtree_t, level2bits) + + (sizeof(unsigned) * height)); + if (ret == NULL) + return (NULL); + memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) * + height)); + + if (malloc_mutex_init(&ret->mutex)) { + /* Leak the rtree. */ + return (NULL); + } + ret->height = height; + if (bits_per_level * height > bits) + ret->level2bits[0] = bits % bits_per_level; + else + ret->level2bits[0] = bits_per_level; + for (i = 1; i < height; i++) + ret->level2bits[i] = bits_per_level; + + ret->root = (void**)base_alloc(sizeof(void *) << ret->level2bits[0]); + if (ret->root == NULL) { + /* + * We leak the rtree here, since there's no generic base + * deallocation. + */ + return (NULL); + } + memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]); + + return (ret); +} diff --git a/contrib/jemalloc/src/stats.c b/contrib/jemalloc/src/stats.c new file mode 100644 index 000000000000..4cad214fc556 --- /dev/null +++ b/contrib/jemalloc/src/stats.c @@ -0,0 +1,550 @@ +#define JEMALLOC_STATS_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +#define CTL_GET(n, v, t) do { \ + size_t sz = sizeof(t); \ + xmallctl(n, v, &sz, NULL, 0); \ +} while (0) + +#define CTL_I_GET(n, v, t) do { \ + size_t mib[6]; \ + size_t miblen = sizeof(mib) / sizeof(size_t); \ + size_t sz = sizeof(t); \ + xmallctlnametomib(n, mib, &miblen); \ + mib[2] = i; \ + xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ +} while (0) + +#define CTL_J_GET(n, v, t) do { \ + size_t mib[6]; \ + size_t miblen = sizeof(mib) / sizeof(size_t); \ + size_t sz = sizeof(t); \ + xmallctlnametomib(n, mib, &miblen); \ + mib[2] = j; \ + xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ +} while (0) + +#define CTL_IJ_GET(n, v, t) do { \ + size_t mib[6]; \ + size_t miblen = sizeof(mib) / sizeof(size_t); \ + size_t sz = sizeof(t); \ + xmallctlnametomib(n, mib, &miblen); \ + mib[2] = i; \ + mib[4] = j; \ + xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ +} while (0) + +/******************************************************************************/ +/* Data. */ + +bool opt_stats_print = false; + +size_t stats_cactive = 0; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void stats_arena_bins_print(void (*write_cb)(void *, const char *), + void *cbopaque, unsigned i); +static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), + void *cbopaque, unsigned i); +static void stats_arena_print(void (*write_cb)(void *, const char *), + void *cbopaque, unsigned i, bool bins, bool large); + +/******************************************************************************/ + +static void +stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, + unsigned i) +{ + size_t page; + bool config_tcache; + unsigned nbins, j, gap_start; + + CTL_GET("arenas.page", &page, size_t); + + CTL_GET("config.tcache", &config_tcache, bool); + if (config_tcache) { + malloc_cprintf(write_cb, cbopaque, + "bins: bin size regs pgs allocated nmalloc" + " ndalloc nrequests nfills nflushes" + " newruns reruns curruns\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "bins: bin size regs pgs allocated nmalloc" + " ndalloc newruns reruns curruns\n"); + } + CTL_GET("arenas.nbins", &nbins, unsigned); + for (j = 0, gap_start = UINT_MAX; j < nbins; j++) { + uint64_t nruns; + + CTL_IJ_GET("stats.arenas.0.bins.0.nruns", &nruns, uint64_t); + if (nruns == 0) { + if (gap_start == UINT_MAX) + gap_start = j; + } else { + size_t reg_size, run_size, allocated; + uint32_t nregs; + uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; + uint64_t reruns; + size_t curruns; + + if (gap_start != UINT_MAX) { + if (j > gap_start + 1) { + /* Gap of more than one size class. */ + malloc_cprintf(write_cb, cbopaque, + "[%u..%u]\n", gap_start, + j - 1); + } else { + /* Gap of one size class. */ + malloc_cprintf(write_cb, cbopaque, + "[%u]\n", gap_start); + } + gap_start = UINT_MAX; + } + CTL_J_GET("arenas.bin.0.size", ®_size, size_t); + CTL_J_GET("arenas.bin.0.nregs", &nregs, uint32_t); + CTL_J_GET("arenas.bin.0.run_size", &run_size, size_t); + CTL_IJ_GET("stats.arenas.0.bins.0.allocated", + &allocated, size_t); + CTL_IJ_GET("stats.arenas.0.bins.0.nmalloc", + &nmalloc, uint64_t); + CTL_IJ_GET("stats.arenas.0.bins.0.ndalloc", + &ndalloc, uint64_t); + if (config_tcache) { + CTL_IJ_GET("stats.arenas.0.bins.0.nrequests", + &nrequests, uint64_t); + CTL_IJ_GET("stats.arenas.0.bins.0.nfills", + &nfills, uint64_t); + CTL_IJ_GET("stats.arenas.0.bins.0.nflushes", + &nflushes, uint64_t); + } + CTL_IJ_GET("stats.arenas.0.bins.0.nreruns", &reruns, + uint64_t); + CTL_IJ_GET("stats.arenas.0.bins.0.curruns", &curruns, + size_t); + if (config_tcache) { + malloc_cprintf(write_cb, cbopaque, + "%13u %5zu %4u %3zu %12zu %12"PRIu64 + " %12"PRIu64" %12"PRIu64" %12"PRIu64 + " %12"PRIu64" %12"PRIu64" %12"PRIu64 + " %12zu\n", + j, reg_size, nregs, run_size / page, + allocated, nmalloc, ndalloc, nrequests, + nfills, nflushes, nruns, reruns, curruns); + } else { + malloc_cprintf(write_cb, cbopaque, + "%13u %5zu %4u %3zu %12zu %12"PRIu64 + " %12"PRIu64" %12"PRIu64" %12"PRIu64 + " %12zu\n", + j, reg_size, nregs, run_size / page, + allocated, nmalloc, ndalloc, nruns, reruns, + curruns); + } + } + } + if (gap_start != UINT_MAX) { + if (j > gap_start + 1) { + /* Gap of more than one size class. */ + malloc_cprintf(write_cb, cbopaque, "[%u..%u]\n", + gap_start, j - 1); + } else { + /* Gap of one size class. */ + malloc_cprintf(write_cb, cbopaque, "[%u]\n", gap_start); + } + } +} + +static void +stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, + unsigned i) +{ + size_t page, nlruns, j; + ssize_t gap_start; + + CTL_GET("arenas.page", &page, size_t); + + malloc_cprintf(write_cb, cbopaque, + "large: size pages nmalloc ndalloc nrequests" + " curruns\n"); + CTL_GET("arenas.nlruns", &nlruns, size_t); + for (j = 0, gap_start = -1; j < nlruns; j++) { + uint64_t nmalloc, ndalloc, nrequests; + size_t run_size, curruns; + + CTL_IJ_GET("stats.arenas.0.lruns.0.nmalloc", &nmalloc, + uint64_t); + CTL_IJ_GET("stats.arenas.0.lruns.0.ndalloc", &ndalloc, + uint64_t); + CTL_IJ_GET("stats.arenas.0.lruns.0.nrequests", &nrequests, + uint64_t); + if (nrequests == 0) { + if (gap_start == -1) + gap_start = j; + } else { + CTL_J_GET("arenas.lrun.0.size", &run_size, size_t); + CTL_IJ_GET("stats.arenas.0.lruns.0.curruns", &curruns, + size_t); + if (gap_start != -1) { + malloc_cprintf(write_cb, cbopaque, "[%zu]\n", + j - gap_start); + gap_start = -1; + } + malloc_cprintf(write_cb, cbopaque, + "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64 + " %12zu\n", + run_size, run_size / page, nmalloc, ndalloc, + nrequests, curruns); + } + } + if (gap_start != -1) + malloc_cprintf(write_cb, cbopaque, "[%zu]\n", j - gap_start); +} + +static void +stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, + unsigned i, bool bins, bool large) +{ + unsigned nthreads; + size_t page, pactive, pdirty, mapped; + uint64_t npurge, nmadvise, purged; + size_t small_allocated; + uint64_t small_nmalloc, small_ndalloc, small_nrequests; + size_t large_allocated; + uint64_t large_nmalloc, large_ndalloc, large_nrequests; + + CTL_GET("arenas.page", &page, size_t); + + CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned); + malloc_cprintf(write_cb, cbopaque, + "assigned threads: %u\n", nthreads); + CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t); + CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t); + CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t); + CTL_I_GET("stats.arenas.0.nmadvise", &nmadvise, uint64_t); + CTL_I_GET("stats.arenas.0.purged", &purged, uint64_t); + malloc_cprintf(write_cb, cbopaque, + "dirty pages: %zu:%zu active:dirty, %"PRIu64" sweep%s," + " %"PRIu64" madvise%s, %"PRIu64" purged\n", + pactive, pdirty, npurge, npurge == 1 ? "" : "s", + nmadvise, nmadvise == 1 ? "" : "s", purged); + + malloc_cprintf(write_cb, cbopaque, + " allocated nmalloc ndalloc nrequests\n"); + CTL_I_GET("stats.arenas.0.small.allocated", &small_allocated, size_t); + CTL_I_GET("stats.arenas.0.small.nmalloc", &small_nmalloc, uint64_t); + CTL_I_GET("stats.arenas.0.small.ndalloc", &small_ndalloc, uint64_t); + CTL_I_GET("stats.arenas.0.small.nrequests", &small_nrequests, uint64_t); + malloc_cprintf(write_cb, cbopaque, + "small: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", + small_allocated, small_nmalloc, small_ndalloc, small_nrequests); + CTL_I_GET("stats.arenas.0.large.allocated", &large_allocated, size_t); + CTL_I_GET("stats.arenas.0.large.nmalloc", &large_nmalloc, uint64_t); + CTL_I_GET("stats.arenas.0.large.ndalloc", &large_ndalloc, uint64_t); + CTL_I_GET("stats.arenas.0.large.nrequests", &large_nrequests, uint64_t); + malloc_cprintf(write_cb, cbopaque, + "large: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", + large_allocated, large_nmalloc, large_ndalloc, large_nrequests); + malloc_cprintf(write_cb, cbopaque, + "total: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", + small_allocated + large_allocated, + small_nmalloc + large_nmalloc, + small_ndalloc + large_ndalloc, + small_nrequests + large_nrequests); + malloc_cprintf(write_cb, cbopaque, "active: %12zu\n", pactive * page); + CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t); + malloc_cprintf(write_cb, cbopaque, "mapped: %12zu\n", mapped); + + if (bins) + stats_arena_bins_print(write_cb, cbopaque, i); + if (large) + stats_arena_lruns_print(write_cb, cbopaque, i); +} + +void +stats_print(void (*write_cb)(void *, const char *), void *cbopaque, + const char *opts) +{ + int err; + uint64_t epoch; + size_t u64sz; + bool general = true; + bool merged = true; + bool unmerged = true; + bool bins = true; + bool large = true; + + /* + * Refresh stats, in case mallctl() was called by the application. + * + * Check for OOM here, since refreshing the ctl cache can trigger + * allocation. In practice, none of the subsequent mallctl()-related + * calls in this function will cause OOM if this one succeeds. + * */ + epoch = 1; + u64sz = sizeof(uint64_t); + err = je_mallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t)); + if (err != 0) { + if (err == EAGAIN) { + malloc_write(": Memory allocation failure in " + "mallctl(\"epoch\", ...)\n"); + return; + } + malloc_write(": Failure in mallctl(\"epoch\", " + "...)\n"); + abort(); + } + + if (write_cb == NULL) { + /* + * The caller did not provide an alternate write_cb callback + * function, so use the default one. malloc_write() is an + * inline function, so use malloc_message() directly here. + */ + write_cb = je_malloc_message; + cbopaque = NULL; + } + + if (opts != NULL) { + unsigned i; + + for (i = 0; opts[i] != '\0'; i++) { + switch (opts[i]) { + case 'g': + general = false; + break; + case 'm': + merged = false; + break; + case 'a': + unmerged = false; + break; + case 'b': + bins = false; + break; + case 'l': + large = false; + break; + default:; + } + } + } + + write_cb(cbopaque, "___ Begin jemalloc statistics ___\n"); + if (general) { + int err; + const char *cpv; + bool bv; + unsigned uv; + ssize_t ssv; + size_t sv, bsz, ssz, sssz, cpsz; + + bsz = sizeof(bool); + ssz = sizeof(size_t); + sssz = sizeof(ssize_t); + cpsz = sizeof(const char *); + + CTL_GET("version", &cpv, const char *); + malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv); + CTL_GET("config.debug", &bv, bool); + malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", + bv ? "enabled" : "disabled"); + +#define OPT_WRITE_BOOL(n) \ + if ((err = je_mallctl("opt."#n, &bv, &bsz, NULL, 0)) \ + == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %s\n", bv ? "true" : "false"); \ + } +#define OPT_WRITE_SIZE_T(n) \ + if ((err = je_mallctl("opt."#n, &sv, &ssz, NULL, 0)) \ + == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zu\n", sv); \ + } +#define OPT_WRITE_SSIZE_T(n) \ + if ((err = je_mallctl("opt."#n, &ssv, &sssz, NULL, 0)) \ + == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": %zd\n", ssv); \ + } +#define OPT_WRITE_CHAR_P(n) \ + if ((err = je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0)) \ + == 0) { \ + malloc_cprintf(write_cb, cbopaque, \ + " opt."#n": \"%s\"\n", cpv); \ + } + + write_cb(cbopaque, "Run-time option settings:\n"); + OPT_WRITE_BOOL(abort) + OPT_WRITE_SIZE_T(lg_chunk) + OPT_WRITE_SIZE_T(narenas) + OPT_WRITE_SSIZE_T(lg_dirty_mult) + OPT_WRITE_BOOL(stats_print) + OPT_WRITE_BOOL(junk) + OPT_WRITE_SIZE_T(quarantine) + OPT_WRITE_BOOL(redzone) + OPT_WRITE_BOOL(zero) + OPT_WRITE_BOOL(utrace) + OPT_WRITE_BOOL(valgrind) + OPT_WRITE_BOOL(xmalloc) + OPT_WRITE_BOOL(tcache) + OPT_WRITE_SSIZE_T(lg_tcache_max) + OPT_WRITE_BOOL(prof) + OPT_WRITE_CHAR_P(prof_prefix) + OPT_WRITE_BOOL(prof_active) + OPT_WRITE_SSIZE_T(lg_prof_sample) + OPT_WRITE_BOOL(prof_accum) + OPT_WRITE_SSIZE_T(lg_prof_interval) + OPT_WRITE_BOOL(prof_gdump) + OPT_WRITE_BOOL(prof_leak) + +#undef OPT_WRITE_BOOL +#undef OPT_WRITE_SIZE_T +#undef OPT_WRITE_SSIZE_T +#undef OPT_WRITE_CHAR_P + + malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus); + + CTL_GET("arenas.narenas", &uv, unsigned); + malloc_cprintf(write_cb, cbopaque, "Max arenas: %u\n", uv); + + malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n", + sizeof(void *)); + + CTL_GET("arenas.quantum", &sv, size_t); + malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv); + + CTL_GET("arenas.page", &sv, size_t); + malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); + + CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t); + if (ssv >= 0) { + malloc_cprintf(write_cb, cbopaque, + "Min active:dirty page ratio per arena: %u:1\n", + (1U << ssv)); + } else { + write_cb(cbopaque, + "Min active:dirty page ratio per arena: N/A\n"); + } + if ((err = je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0)) + == 0) { + malloc_cprintf(write_cb, cbopaque, + "Maximum thread-cached size class: %zu\n", sv); + } + if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 && + bv) { + CTL_GET("opt.lg_prof_sample", &sv, size_t); + malloc_cprintf(write_cb, cbopaque, + "Average profile sample interval: %"PRIu64 + " (2^%zu)\n", (((uint64_t)1U) << sv), sv); + + CTL_GET("opt.lg_prof_interval", &ssv, ssize_t); + if (ssv >= 0) { + malloc_cprintf(write_cb, cbopaque, + "Average profile dump interval: %"PRIu64 + " (2^%zd)\n", + (((uint64_t)1U) << ssv), ssv); + } else { + write_cb(cbopaque, + "Average profile dump interval: N/A\n"); + } + } + CTL_GET("opt.lg_chunk", &sv, size_t); + malloc_cprintf(write_cb, cbopaque, "Chunk size: %zu (2^%zu)\n", + (ZU(1) << sv), sv); + } + + if (config_stats) { + size_t *cactive; + size_t allocated, active, mapped; + size_t chunks_current, chunks_high; + uint64_t chunks_total; + size_t huge_allocated; + uint64_t huge_nmalloc, huge_ndalloc; + + CTL_GET("stats.cactive", &cactive, size_t *); + CTL_GET("stats.allocated", &allocated, size_t); + CTL_GET("stats.active", &active, size_t); + CTL_GET("stats.mapped", &mapped, size_t); + malloc_cprintf(write_cb, cbopaque, + "Allocated: %zu, active: %zu, mapped: %zu\n", + allocated, active, mapped); + malloc_cprintf(write_cb, cbopaque, + "Current active ceiling: %zu\n", atomic_read_z(cactive)); + + /* Print chunk stats. */ + CTL_GET("stats.chunks.total", &chunks_total, uint64_t); + CTL_GET("stats.chunks.high", &chunks_high, size_t); + CTL_GET("stats.chunks.current", &chunks_current, size_t); + malloc_cprintf(write_cb, cbopaque, "chunks: nchunks " + "highchunks curchunks\n"); + malloc_cprintf(write_cb, cbopaque, " %13"PRIu64"%13zu%13zu\n", + chunks_total, chunks_high, chunks_current); + + /* Print huge stats. */ + CTL_GET("stats.huge.nmalloc", &huge_nmalloc, uint64_t); + CTL_GET("stats.huge.ndalloc", &huge_ndalloc, uint64_t); + CTL_GET("stats.huge.allocated", &huge_allocated, size_t); + malloc_cprintf(write_cb, cbopaque, + "huge: nmalloc ndalloc allocated\n"); + malloc_cprintf(write_cb, cbopaque, + " %12"PRIu64" %12"PRIu64" %12zu\n", + huge_nmalloc, huge_ndalloc, huge_allocated); + + if (merged) { + unsigned narenas; + + CTL_GET("arenas.narenas", &narenas, unsigned); + { + bool initialized[narenas]; + size_t isz; + unsigned i, ninitialized; + + isz = sizeof(initialized); + xmallctl("arenas.initialized", initialized, + &isz, NULL, 0); + for (i = ninitialized = 0; i < narenas; i++) { + if (initialized[i]) + ninitialized++; + } + + if (ninitialized > 1 || unmerged == false) { + /* Print merged arena stats. */ + malloc_cprintf(write_cb, cbopaque, + "\nMerged arenas stats:\n"); + stats_arena_print(write_cb, cbopaque, + narenas, bins, large); + } + } + } + + if (unmerged) { + unsigned narenas; + + /* Print stats for each arena. */ + + CTL_GET("arenas.narenas", &narenas, unsigned); + { + bool initialized[narenas]; + size_t isz; + unsigned i; + + isz = sizeof(initialized); + xmallctl("arenas.initialized", initialized, + &isz, NULL, 0); + + for (i = 0; i < narenas; i++) { + if (initialized[i]) { + malloc_cprintf(write_cb, + cbopaque, + "\narenas[%u]:\n", i); + stats_arena_print(write_cb, + cbopaque, i, bins, large); + } + } + } + } + } + write_cb(cbopaque, "--- End jemalloc statistics ---\n"); +} diff --git a/contrib/jemalloc/src/tcache.c b/contrib/jemalloc/src/tcache.c new file mode 100644 index 000000000000..be26b59ceb32 --- /dev/null +++ b/contrib/jemalloc/src/tcache.c @@ -0,0 +1,435 @@ +#define JEMALLOC_TCACHE_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +malloc_tsd_data(, tcache, tcache_t *, NULL) +malloc_tsd_data(, tcache_enabled, tcache_enabled_t, tcache_enabled_default) + +bool opt_tcache = true; +ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; + +tcache_bin_info_t *tcache_bin_info; +static unsigned stack_nelms; /* Total stack elms per tcache. */ + +size_t nhbins; +size_t tcache_maxclass; + +/******************************************************************************/ + +void * +tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) +{ + void *ret; + + arena_tcache_fill_small(tcache->arena, tbin, binind, + config_prof ? tcache->prof_accumbytes : 0); + if (config_prof) + tcache->prof_accumbytes = 0; + ret = tcache_alloc_easy(tbin); + + return (ret); +} + +void +tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, + tcache_t *tcache) +{ + void *ptr; + unsigned i, nflush, ndeferred; + bool merged_stats = false; + + assert(binind < NBINS); + assert(rem <= tbin->ncached); + + for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { + /* Lock the arena bin associated with the first object. */ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( + tbin->avail[0]); + arena_t *arena = chunk->arena; + arena_bin_t *bin = &arena->bins[binind]; + + if (config_prof && arena == tcache->arena) { + malloc_mutex_lock(&arena->lock); + arena_prof_accum(arena, tcache->prof_accumbytes); + malloc_mutex_unlock(&arena->lock); + tcache->prof_accumbytes = 0; + } + + malloc_mutex_lock(&bin->lock); + if (config_stats && arena == tcache->arena) { + assert(merged_stats == false); + merged_stats = true; + bin->stats.nflushes++; + bin->stats.nrequests += tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + } + ndeferred = 0; + for (i = 0; i < nflush; i++) { + ptr = tbin->avail[i]; + assert(ptr != NULL); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk->arena == arena) { + size_t pageind = ((uintptr_t)ptr - + (uintptr_t)chunk) >> LG_PAGE; + arena_chunk_map_t *mapelm = + &chunk->map[pageind-map_bias]; + if (config_fill && opt_junk) { + arena_alloc_junk_small(ptr, + &arena_bin_info[binind], true); + } + arena_dalloc_bin(arena, chunk, ptr, mapelm); + } else { + /* + * This object was allocated via a different + * arena bin than the one that is currently + * locked. Stash the object, so that it can be + * handled in a future pass. + */ + tbin->avail[ndeferred] = ptr; + ndeferred++; + } + } + malloc_mutex_unlock(&bin->lock); + } + if (config_stats && merged_stats == false) { + /* + * The flush loop didn't happen to flush to this thread's + * arena, so the stats didn't get merged. Manually do so now. + */ + arena_bin_t *bin = &tcache->arena->bins[binind]; + malloc_mutex_lock(&bin->lock); + bin->stats.nflushes++; + bin->stats.nrequests += tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + malloc_mutex_unlock(&bin->lock); + } + + memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], + rem * sizeof(void *)); + tbin->ncached = rem; + if ((int)tbin->ncached < tbin->low_water) + tbin->low_water = tbin->ncached; +} + +void +tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, + tcache_t *tcache) +{ + void *ptr; + unsigned i, nflush, ndeferred; + bool merged_stats = false; + + assert(binind < nhbins); + assert(rem <= tbin->ncached); + + for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { + /* Lock the arena associated with the first object. */ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( + tbin->avail[0]); + arena_t *arena = chunk->arena; + + malloc_mutex_lock(&arena->lock); + if ((config_prof || config_stats) && arena == tcache->arena) { + if (config_prof) { + arena_prof_accum(arena, + tcache->prof_accumbytes); + tcache->prof_accumbytes = 0; + } + if (config_stats) { + merged_stats = true; + arena->stats.nrequests_large += + tbin->tstats.nrequests; + arena->stats.lstats[binind - NBINS].nrequests += + tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + } + } + ndeferred = 0; + for (i = 0; i < nflush; i++) { + ptr = tbin->avail[i]; + assert(ptr != NULL); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk->arena == arena) + arena_dalloc_large(arena, chunk, ptr); + else { + /* + * This object was allocated via a different + * arena than the one that is currently locked. + * Stash the object, so that it can be handled + * in a future pass. + */ + tbin->avail[ndeferred] = ptr; + ndeferred++; + } + } + malloc_mutex_unlock(&arena->lock); + } + if (config_stats && merged_stats == false) { + /* + * The flush loop didn't happen to flush to this thread's + * arena, so the stats didn't get merged. Manually do so now. + */ + arena_t *arena = tcache->arena; + malloc_mutex_lock(&arena->lock); + arena->stats.nrequests_large += tbin->tstats.nrequests; + arena->stats.lstats[binind - NBINS].nrequests += + tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + malloc_mutex_unlock(&arena->lock); + } + + memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], + rem * sizeof(void *)); + tbin->ncached = rem; + if ((int)tbin->ncached < tbin->low_water) + tbin->low_water = tbin->ncached; +} + +void +tcache_arena_associate(tcache_t *tcache, arena_t *arena) +{ + + if (config_stats) { + /* Link into list of extant tcaches. */ + malloc_mutex_lock(&arena->lock); + ql_elm_new(tcache, link); + ql_tail_insert(&arena->tcache_ql, tcache, link); + malloc_mutex_unlock(&arena->lock); + } + tcache->arena = arena; +} + +void +tcache_arena_dissociate(tcache_t *tcache) +{ + + if (config_stats) { + /* Unlink from list of extant tcaches. */ + malloc_mutex_lock(&tcache->arena->lock); + ql_remove(&tcache->arena->tcache_ql, tcache, link); + malloc_mutex_unlock(&tcache->arena->lock); + tcache_stats_merge(tcache, tcache->arena); + } +} + +tcache_t * +tcache_create(arena_t *arena) +{ + tcache_t *tcache; + size_t size, stack_offset; + unsigned i; + + size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins); + /* Naturally align the pointer stacks. */ + size = PTR_CEILING(size); + stack_offset = size; + size += stack_nelms * sizeof(void *); + /* + * Round up to the nearest multiple of the cacheline size, in order to + * avoid the possibility of false cacheline sharing. + * + * That this works relies on the same logic as in ipalloc(), but we + * cannot directly call ipalloc() here due to tcache bootstrapping + * issues. + */ + size = (size + CACHELINE_MASK) & (-CACHELINE); + + if (size <= SMALL_MAXCLASS) + tcache = (tcache_t *)arena_malloc_small(arena, size, true); + else if (size <= tcache_maxclass) + tcache = (tcache_t *)arena_malloc_large(arena, size, true); + else + tcache = (tcache_t *)icalloc(size); + + if (tcache == NULL) + return (NULL); + + tcache_arena_associate(tcache, arena); + + assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); + for (i = 0; i < nhbins; i++) { + tcache->tbins[i].lg_fill_div = 1; + tcache->tbins[i].avail = (void **)((uintptr_t)tcache + + (uintptr_t)stack_offset); + stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); + } + + tcache_tsd_set(&tcache); + + return (tcache); +} + +void +tcache_destroy(tcache_t *tcache) +{ + unsigned i; + size_t tcache_size; + + tcache_arena_dissociate(tcache); + + for (i = 0; i < NBINS; i++) { + tcache_bin_t *tbin = &tcache->tbins[i]; + tcache_bin_flush_small(tbin, i, 0, tcache); + + if (config_stats && tbin->tstats.nrequests != 0) { + arena_t *arena = tcache->arena; + arena_bin_t *bin = &arena->bins[i]; + malloc_mutex_lock(&bin->lock); + bin->stats.nrequests += tbin->tstats.nrequests; + malloc_mutex_unlock(&bin->lock); + } + } + + for (; i < nhbins; i++) { + tcache_bin_t *tbin = &tcache->tbins[i]; + tcache_bin_flush_large(tbin, i, 0, tcache); + + if (config_stats && tbin->tstats.nrequests != 0) { + arena_t *arena = tcache->arena; + malloc_mutex_lock(&arena->lock); + arena->stats.nrequests_large += tbin->tstats.nrequests; + arena->stats.lstats[i - NBINS].nrequests += + tbin->tstats.nrequests; + malloc_mutex_unlock(&arena->lock); + } + } + + if (config_prof && tcache->prof_accumbytes > 0) { + malloc_mutex_lock(&tcache->arena->lock); + arena_prof_accum(tcache->arena, tcache->prof_accumbytes); + malloc_mutex_unlock(&tcache->arena->lock); + } + + tcache_size = arena_salloc(tcache, false); + if (tcache_size <= SMALL_MAXCLASS) { + arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); + arena_t *arena = chunk->arena; + size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> + LG_PAGE; + arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) << + LG_PAGE)); + arena_bin_t *bin = run->bin; + + malloc_mutex_lock(&bin->lock); + arena_dalloc_bin(arena, chunk, tcache, mapelm); + malloc_mutex_unlock(&bin->lock); + } else if (tcache_size <= tcache_maxclass) { + arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); + arena_t *arena = chunk->arena; + + malloc_mutex_lock(&arena->lock); + arena_dalloc_large(arena, chunk, tcache); + malloc_mutex_unlock(&arena->lock); + } else + idalloc(tcache); +} + +void +tcache_thread_cleanup(void *arg) +{ + tcache_t *tcache = *(tcache_t **)arg; + + if (tcache == TCACHE_STATE_DISABLED) { + /* Do nothing. */ + } else if (tcache == TCACHE_STATE_REINCARNATED) { + /* + * Another destructor called an allocator function after this + * destructor was called. Reset tcache to + * TCACHE_STATE_PURGATORY in order to receive another callback. + */ + tcache = TCACHE_STATE_PURGATORY; + tcache_tsd_set(&tcache); + } else if (tcache == TCACHE_STATE_PURGATORY) { + /* + * The previous time this destructor was called, we set the key + * to TCACHE_STATE_PURGATORY so that other destructors wouldn't + * cause re-creation of the tcache. This time, do nothing, so + * that the destructor will not be called again. + */ + } else if (tcache != NULL) { + assert(tcache != TCACHE_STATE_PURGATORY); + tcache_destroy(tcache); + tcache = TCACHE_STATE_PURGATORY; + tcache_tsd_set(&tcache); + } +} + +void +tcache_stats_merge(tcache_t *tcache, arena_t *arena) +{ + unsigned i; + + /* Merge and reset tcache stats. */ + for (i = 0; i < NBINS; i++) { + arena_bin_t *bin = &arena->bins[i]; + tcache_bin_t *tbin = &tcache->tbins[i]; + malloc_mutex_lock(&bin->lock); + bin->stats.nrequests += tbin->tstats.nrequests; + malloc_mutex_unlock(&bin->lock); + tbin->tstats.nrequests = 0; + } + + for (; i < nhbins; i++) { + malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS]; + tcache_bin_t *tbin = &tcache->tbins[i]; + arena->stats.nrequests_large += tbin->tstats.nrequests; + lstats->nrequests += tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + } +} + +bool +tcache_boot0(void) +{ + unsigned i; + + /* + * If necessary, clamp opt_lg_tcache_max, now that arena_maxclass is + * known. + */ + if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS) + tcache_maxclass = SMALL_MAXCLASS; + else if ((1U << opt_lg_tcache_max) > arena_maxclass) + tcache_maxclass = arena_maxclass; + else + tcache_maxclass = (1U << opt_lg_tcache_max); + + nhbins = NBINS + (tcache_maxclass >> LG_PAGE); + + /* Initialize tcache_bin_info. */ + tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * + sizeof(tcache_bin_info_t)); + if (tcache_bin_info == NULL) + return (true); + stack_nelms = 0; + for (i = 0; i < NBINS; i++) { + if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { + tcache_bin_info[i].ncached_max = + (arena_bin_info[i].nregs << 1); + } else { + tcache_bin_info[i].ncached_max = + TCACHE_NSLOTS_SMALL_MAX; + } + stack_nelms += tcache_bin_info[i].ncached_max; + } + for (; i < nhbins; i++) { + tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; + stack_nelms += tcache_bin_info[i].ncached_max; + } + + return (false); +} + +bool +tcache_boot1(void) +{ + + if (tcache_tsd_boot() || tcache_enabled_tsd_boot()) + return (true); + + return (false); +} diff --git a/contrib/jemalloc/src/tsd.c b/contrib/jemalloc/src/tsd.c new file mode 100644 index 000000000000..0838dc867fdb --- /dev/null +++ b/contrib/jemalloc/src/tsd.c @@ -0,0 +1,72 @@ +#define JEMALLOC_TSD_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +static unsigned ncleanups; +static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX]; + +/******************************************************************************/ + +void * +malloc_tsd_malloc(size_t size) +{ + + /* Avoid choose_arena() in order to dodge bootstrapping issues. */ + return arena_malloc(arenas[0], size, false, false); +} + +void +malloc_tsd_dalloc(void *wrapper) +{ + + idalloc(wrapper); +} + +void +malloc_tsd_no_cleanup(void *arg) +{ + + not_reached(); +} + +#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP +void +_malloc_thread_cleanup(void) +{ + bool pending[ncleanups], again; + unsigned i; + + for (i = 0; i < ncleanups; i++) + pending[i] = true; + + do { + again = false; + for (i = 0; i < ncleanups; i++) { + if (pending[i]) { + pending[i] = cleanups[i].f(cleanups[i].arg); + if (pending[i]) + again = true; + } + } + } while (again); +} +#endif + +void +malloc_tsd_cleanup_register(bool (*f)(void *), void *arg) +{ + + assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX); + cleanups[ncleanups].f = f; + cleanups[ncleanups].arg = arg; + ncleanups++; +} + +void +malloc_tsd_boot(void) +{ + + ncleanups = 0; +} diff --git a/contrib/jemalloc/src/util.c b/contrib/jemalloc/src/util.c new file mode 100644 index 000000000000..8b050422bea4 --- /dev/null +++ b/contrib/jemalloc/src/util.c @@ -0,0 +1,635 @@ +#define assert(e) do { \ + if (config_debug && !(e)) { \ + malloc_write(": Failed assertion\n"); \ + abort(); \ + } \ +} while (0) + +#define not_reached() do { \ + if (config_debug) { \ + malloc_write(": Unreachable code reached\n"); \ + abort(); \ + } \ +} while (0) + +#define not_implemented() do { \ + if (config_debug) { \ + malloc_write(": Not implemented\n"); \ + abort(); \ + } \ +} while (0) + +#define JEMALLOC_UTIL_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void wrtmessage(void *cbopaque, const char *s); +#define U2S_BUFSIZE ((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1) +static char *u2s(uintmax_t x, unsigned base, bool uppercase, char *s, + size_t *slen_p); +#define D2S_BUFSIZE (1 + U2S_BUFSIZE) +static char *d2s(intmax_t x, char sign, char *s, size_t *slen_p); +#define O2S_BUFSIZE (1 + U2S_BUFSIZE) +static char *o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p); +#define X2S_BUFSIZE (2 + U2S_BUFSIZE) +static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, + size_t *slen_p); + +/******************************************************************************/ + +/* malloc_message() setup. */ +JEMALLOC_CATTR(visibility("hidden"), static) +void +wrtmessage(void *cbopaque, const char *s) +{ + +#ifdef SYS_write + /* + * Use syscall(2) rather than write(2) when possible in order to avoid + * the possibility of memory allocation within libc. This is necessary + * on FreeBSD; most operating systems do not have this problem though. + */ + UNUSED int result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); +#else + UNUSED int result = write(STDERR_FILENO, s, strlen(s)); +#endif +} + +void (*je_malloc_message)(void *, const char *s) + JEMALLOC_ATTR(visibility("default")) = wrtmessage; + +JEMALLOC_CATTR(visibility("hidden"), static) +void +wrtmessage_1_0(const char *s1, const char *s2, const char *s3, + const char *s4) +{ + + wrtmessage(NULL, s1); + wrtmessage(NULL, s2); + wrtmessage(NULL, s3); + wrtmessage(NULL, s4); +} + +void (*__malloc_message_1_0)(const char *s1, const char *s2, const char *s3, + const char *s4) = wrtmessage_1_0; +__sym_compat(_malloc_message, __malloc_message_1_0, FBSD_1.0); + +/* + * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so + * provide a wrapper. + */ +int +buferror(int errnum, char *buf, size_t buflen) +{ +#ifdef _GNU_SOURCE + char *b = strerror_r(errno, buf, buflen); + if (b != buf) { + strncpy(buf, b, buflen); + buf[buflen-1] = '\0'; + } + return (0); +#else + return (strerror_r(errno, buf, buflen)); +#endif +} + +uintmax_t +malloc_strtoumax(const char *nptr, char **endptr, int base) +{ + uintmax_t ret, digit; + int b; + bool neg; + const char *p, *ns; + + if (base < 0 || base == 1 || base > 36) { + errno = EINVAL; + return (UINTMAX_MAX); + } + b = base; + + /* Swallow leading whitespace and get sign, if any. */ + neg = false; + p = nptr; + while (true) { + switch (*p) { + case '\t': case '\n': case '\v': case '\f': case '\r': case ' ': + p++; + break; + case '-': + neg = true; + /* Fall through. */ + case '+': + p++; + /* Fall through. */ + default: + goto label_prefix; + } + } + + /* Get prefix, if any. */ + label_prefix: + /* + * Note where the first non-whitespace/sign character is so that it is + * possible to tell whether any digits are consumed (e.g., " 0" vs. + * " -x"). + */ + ns = p; + if (*p == '0') { + switch (p[1]) { + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': + if (b == 0) + b = 8; + if (b == 8) + p++; + break; + case 'x': + switch (p[2]) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + if (b == 0) + b = 16; + if (b == 16) + p += 2; + break; + default: + break; + } + break; + default: + break; + } + } + if (b == 0) + b = 10; + + /* Convert. */ + ret = 0; + while ((*p >= '0' && *p <= '9' && (digit = *p - '0') < b) + || (*p >= 'A' && *p <= 'Z' && (digit = 10 + *p - 'A') < b) + || (*p >= 'a' && *p <= 'z' && (digit = 10 + *p - 'a') < b)) { + uintmax_t pret = ret; + ret *= b; + ret += digit; + if (ret < pret) { + /* Overflow. */ + errno = ERANGE; + return (UINTMAX_MAX); + } + p++; + } + if (neg) + ret = -ret; + + if (endptr != NULL) { + if (p == ns) { + /* No characters were converted. */ + *endptr = (char *)nptr; + } else + *endptr = (char *)p; + } + + return (ret); +} + +static char * +u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) +{ + unsigned i; + + i = U2S_BUFSIZE - 1; + s[i] = '\0'; + switch (base) { + case 10: + do { + i--; + s[i] = "0123456789"[x % (uint64_t)10]; + x /= (uint64_t)10; + } while (x > 0); + break; + case 16: { + const char *digits = (uppercase) + ? "0123456789ABCDEF" + : "0123456789abcdef"; + + do { + i--; + s[i] = digits[x & 0xf]; + x >>= 4; + } while (x > 0); + break; + } default: { + const char *digits = (uppercase) + ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" + : "0123456789abcdefghijklmnopqrstuvwxyz"; + + assert(base >= 2 && base <= 36); + do { + i--; + s[i] = digits[x % (uint64_t)base]; + x /= (uint64_t)base; + } while (x > 0); + }} + + *slen_p = U2S_BUFSIZE - 1 - i; + return (&s[i]); +} + +static char * +d2s(intmax_t x, char sign, char *s, size_t *slen_p) +{ + bool neg; + + if ((neg = (x < 0))) + x = -x; + s = u2s(x, 10, false, s, slen_p); + if (neg) + sign = '-'; + switch (sign) { + case '-': + if (neg == false) + break; + /* Fall through. */ + case ' ': + case '+': + s--; + (*slen_p)++; + *s = sign; + break; + default: not_reached(); + } + return (s); +} + +static char * +o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p) +{ + + s = u2s(x, 8, false, s, slen_p); + if (alt_form && *s != '0') { + s--; + (*slen_p)++; + *s = '0'; + } + return (s); +} + +static char * +x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) +{ + + s = u2s(x, 16, uppercase, s, slen_p); + if (alt_form) { + s -= 2; + (*slen_p) += 2; + memcpy(s, uppercase ? "0X" : "0x", 2); + } + return (s); +} + +int +malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) +{ + int ret; + size_t i; + const char *f; + va_list tap; + +#define APPEND_C(c) do { \ + if (i < size) \ + str[i] = (c); \ + i++; \ +} while (0) +#define APPEND_S(s, slen) do { \ + if (i < size) { \ + size_t cpylen = (slen <= size - i) ? slen : size - i; \ + memcpy(&str[i], s, cpylen); \ + } \ + i += slen; \ +} while (0) +#define APPEND_PADDED_S(s, slen, width, left_justify) do { \ + /* Left padding. */ \ + size_t pad_len = (width == -1) ? 0 : ((slen < (size_t)width) ? \ + (size_t)width - slen : 0); \ + if (left_justify == false && pad_len != 0) { \ + size_t j; \ + for (j = 0; j < pad_len; j++) \ + APPEND_C(' '); \ + } \ + /* Value. */ \ + APPEND_S(s, slen); \ + /* Right padding. */ \ + if (left_justify && pad_len != 0) { \ + size_t j; \ + for (j = 0; j < pad_len; j++) \ + APPEND_C(' '); \ + } \ +} while (0) +#define GET_ARG_NUMERIC(val, len) do { \ + switch (len) { \ + case '?': \ + val = va_arg(ap, int); \ + break; \ + case 'l': \ + val = va_arg(ap, long); \ + break; \ + case 'q': \ + val = va_arg(ap, long long); \ + break; \ + case 'j': \ + val = va_arg(ap, intmax_t); \ + break; \ + case 't': \ + val = va_arg(ap, ptrdiff_t); \ + break; \ + case 'z': \ + val = va_arg(ap, ssize_t); \ + break; \ + case 'p': /* Synthetic; used for %p. */ \ + val = va_arg(ap, uintptr_t); \ + break; \ + default: not_reached(); \ + } \ +} while (0) + + if (config_debug) + va_copy(tap, ap); + + i = 0; + f = format; + while (true) { + switch (*f) { + case '\0': goto label_out; + case '%': { + bool alt_form = false; + bool zero_pad = false; + bool left_justify = false; + bool plus_space = false; + bool plus_plus = false; + int prec = -1; + int width = -1; + char len = '?'; + + f++; + if (*f == '%') { + /* %% */ + APPEND_C(*f); + break; + } + /* Flags. */ + while (true) { + switch (*f) { + case '#': + assert(alt_form == false); + alt_form = true; + break; + case '0': + assert(zero_pad == false); + zero_pad = true; + break; + case '-': + assert(left_justify == false); + left_justify = true; + break; + case ' ': + assert(plus_space == false); + plus_space = true; + break; + case '+': + assert(plus_plus == false); + plus_plus = true; + break; + default: goto label_width; + } + f++; + } + /* Width. */ + label_width: + switch (*f) { + case '*': + width = va_arg(ap, int); + f++; + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': { + uintmax_t uwidth; + errno = 0; + uwidth = malloc_strtoumax(f, (char **)&f, 10); + assert(uwidth != UINTMAX_MAX || errno != + ERANGE); + width = (int)uwidth; + if (*f == '.') { + f++; + goto label_precision; + } else + goto label_length; + break; + } case '.': + f++; + goto label_precision; + default: goto label_length; + } + /* Precision. */ + label_precision: + switch (*f) { + case '*': + prec = va_arg(ap, int); + f++; + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': { + uintmax_t uprec; + errno = 0; + uprec = malloc_strtoumax(f, (char **)&f, 10); + assert(uprec != UINTMAX_MAX || errno != ERANGE); + prec = (int)uprec; + break; + } + default: break; + } + /* Length. */ + label_length: + switch (*f) { + case 'l': + f++; + if (*f == 'l') { + len = 'q'; + f++; + } else + len = 'l'; + break; + case 'j': + len = 'j'; + f++; + break; + case 't': + len = 't'; + f++; + break; + case 'z': + len = 'z'; + f++; + break; + default: break; + } + /* Conversion specifier. */ + switch (*f) { + char *s; + size_t slen; + case 'd': case 'i': { + intmax_t val JEMALLOC_CC_SILENCE_INIT(0); + char buf[D2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, len); + s = d2s(val, (plus_plus ? '+' : (plus_space ? + ' ' : '-')), buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } case 'o': { + uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); + char buf[O2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, len); + s = o2s(val, alt_form, buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } case 'u': { + uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); + char buf[U2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, len); + s = u2s(val, 10, false, buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } case 'x': case 'X': { + uintmax_t val JEMALLOC_CC_SILENCE_INIT(0); + char buf[X2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, len); + s = x2s(val, alt_form, *f == 'X', buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } case 'c': { + unsigned char val; + char buf[2]; + + assert(len == '?' || len == 'l'); + assert_not_implemented(len != 'l'); + val = va_arg(ap, int); + buf[0] = val; + buf[1] = '\0'; + APPEND_PADDED_S(buf, 1, width, left_justify); + f++; + break; + } case 's': + assert(len == '?' || len == 'l'); + assert_not_implemented(len != 'l'); + s = va_arg(ap, char *); + slen = (prec == -1) ? strlen(s) : prec; + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + case 'p': { + uintmax_t val; + char buf[X2S_BUFSIZE]; + + GET_ARG_NUMERIC(val, 'p'); + s = x2s(val, true, false, buf, &slen); + APPEND_PADDED_S(s, slen, width, left_justify); + f++; + break; + } + default: not_implemented(); + } + break; + } default: { + APPEND_C(*f); + f++; + break; + }} + } + label_out: + if (i < size) + str[i] = '\0'; + else + str[size - 1] = '\0'; + ret = i; + +#undef APPEND_C +#undef APPEND_S +#undef APPEND_PADDED_S +#undef GET_ARG_NUMERIC + return (ret); +} + +JEMALLOC_ATTR(format(printf, 3, 4)) +int +malloc_snprintf(char *str, size_t size, const char *format, ...) +{ + int ret; + va_list ap; + + va_start(ap, format); + ret = malloc_vsnprintf(str, size, format, ap); + va_end(ap); + + return (ret); +} + +void +malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, va_list ap) +{ + char buf[MALLOC_PRINTF_BUFSIZE]; + + if (write_cb == NULL) { + /* + * The caller did not provide an alternate write_cb callback + * function, so use the default one. malloc_write() is an + * inline function, so use malloc_message() directly here. + */ + write_cb = je_malloc_message; + cbopaque = NULL; + } + + malloc_vsnprintf(buf, sizeof(buf), format, ap); + write_cb(cbopaque, buf); +} + +/* + * Print to a callback function in such a way as to (hopefully) avoid memory + * allocation. + */ +JEMALLOC_ATTR(format(printf, 3, 4)) +void +malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + malloc_vcprintf(write_cb, cbopaque, format, ap); + va_end(ap); +} + +/* Print to stderr in such a way as to avoid memory allocation. */ +JEMALLOC_ATTR(format(printf, 1, 2)) +void +malloc_printf(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + malloc_vcprintf(NULL, NULL, format, ap); + va_end(ap); +} diff --git a/contrib/llvm/LICENSE.TXT b/contrib/llvm/LICENSE.TXT index 1b1047ca37db..00cf60116941 100644 --- a/contrib/llvm/LICENSE.TXT +++ b/contrib/llvm/LICENSE.TXT @@ -4,7 +4,7 @@ LLVM Release License University of Illinois/NCSA Open Source License -Copyright (c) 2003-2011 University of Illinois at Urbana-Champaign. +Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign. All rights reserved. Developed by: @@ -67,3 +67,4 @@ Autoconf llvm/autoconf CellSPU backend llvm/lib/Target/CellSPU/README.txt Google Test llvm/utils/unittest/googletest OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex} +pyyaml tests llvm/test/YAMLParser/{*.data, LICENSE.TXT} diff --git a/contrib/llvm/include/llvm-c/Analysis.h b/contrib/llvm/include/llvm-c/Analysis.h index e1e44872b162..f0bdddc50ab7 100644 --- a/contrib/llvm/include/llvm-c/Analysis.h +++ b/contrib/llvm/include/llvm-c/Analysis.h @@ -25,6 +25,12 @@ extern "C" { #endif +/** + * @defgroup LLVMCAnalysis Analysis + * @ingroup LLVMC + * + * @{ + */ typedef enum { LLVMAbortProcessAction, /* verifier will print to stderr and abort() */ @@ -48,6 +54,10 @@ LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action); void LLVMViewFunctionCFG(LLVMValueRef Fn); void LLVMViewFunctionCFGOnly(LLVMValueRef Fn); +/** + * @} + */ + #ifdef __cplusplus } #endif diff --git a/contrib/llvm/include/llvm-c/BitReader.h b/contrib/llvm/include/llvm-c/BitReader.h index 6db66074b31a..522803518398 100644 --- a/contrib/llvm/include/llvm-c/BitReader.h +++ b/contrib/llvm/include/llvm-c/BitReader.h @@ -25,6 +25,12 @@ extern "C" { #endif +/** + * @defgroup LLVMCBitReader Bit Reader + * @ingroup LLVMC + * + * @{ + */ /* Builds a module from the bitcode in the specified memory buffer, returning a reference to the module via the OutModule parameter. Returns 0 on success. @@ -59,6 +65,10 @@ LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf, LLVMModuleProviderRef *OutMP, char **OutMessage); +/** + * @} + */ + #ifdef __cplusplus } #endif diff --git a/contrib/llvm/include/llvm-c/BitWriter.h b/contrib/llvm/include/llvm-c/BitWriter.h index bcbfb111492a..ba5a6778c942 100644 --- a/contrib/llvm/include/llvm-c/BitWriter.h +++ b/contrib/llvm/include/llvm-c/BitWriter.h @@ -25,6 +25,12 @@ extern "C" { #endif +/** + * @defgroup LLVMCBitWriter Bit Writer + * @ingroup LLVMC + * + * @{ + */ /*===-- Operations on modules ---------------------------------------------===*/ @@ -39,6 +45,10 @@ int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose, descriptor. Returns 0 on success. Closes the Handle. */ int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int Handle); +/** + * @} + */ + #ifdef __cplusplus } #endif diff --git a/contrib/llvm/include/llvm-c/Core.h b/contrib/llvm/include/llvm-c/Core.h index d23b91c4e0de..77746069a25c 100644 --- a/contrib/llvm/include/llvm-c/Core.h +++ b/contrib/llvm/include/llvm-c/Core.h @@ -10,24 +10,6 @@ |* This header declares the C interface to libLLVMCore.a, which implements *| |* the LLVM intermediate representation. *| |* *| -|* LLVM uses a polymorphic type hierarchy which C cannot represent, therefore *| -|* parameters must be passed as base types. Despite the declared types, most *| -|* of the functions provided operate only on branches of the type hierarchy. *| -|* The declared parameter names are descriptive and specify which type is *| -|* required. Additionally, each type hierarchy is documented along with the *| -|* functions that operate upon it. For more detail, refer to LLVM's C++ code. *| -|* If in doubt, refer to Core.cpp, which performs paramter downcasts in the *| -|* form unwrap(Param). *| -|* *| -|* Many exotic languages can interoperate with C code but have a harder time *| -|* with C++ due to name mangling. So in addition to C, this interface enables *| -|* tools written in such languages. *| -|* *| -|* When included into a C++ source file, also declares 'wrap' and 'unwrap' *| -|* helpers to perform opaque reference<-->pointer conversions. These helpers *| -|* are shorter and more tightly typed than writing the casts by hand when *| -|* authoring bindings. In assert builds, they will do runtime type checking. *| -|* *| \*===----------------------------------------------------------------------===*/ #ifndef LLVM_C_CORE_H @@ -46,50 +28,121 @@ extern "C" { #endif +/** + * @defgroup LLVMC LLVM-C: C interface to LLVM + * + * This module exposes parts of the LLVM library as a C API. + * + * @{ + */ + +/** + * @defgroup LLVMCTransforms Transforms + */ + +/** + * @defgroup LLVMCCore Core + * + * This modules provide an interface to libLLVMCore, which implements + * the LLVM intermediate representation as well as other related types + * and utilities. + * + * LLVM uses a polymorphic type hierarchy which C cannot represent, therefore + * parameters must be passed as base types. Despite the declared types, most + * of the functions provided operate only on branches of the type hierarchy. + * The declared parameter names are descriptive and specify which type is + * required. Additionally, each type hierarchy is documented along with the + * functions that operate upon it. For more detail, refer to LLVM's C++ code. + * If in doubt, refer to Core.cpp, which performs paramter downcasts in the + * form unwrap(Param). + * + * Many exotic languages can interoperate with C code but have a harder time + * with C++ due to name mangling. So in addition to C, this interface enables + * tools written in such languages. + * + * When included into a C++ source file, also declares 'wrap' and 'unwrap' + * helpers to perform opaque reference<-->pointer conversions. These helpers + * are shorter and more tightly typed than writing the casts by hand when + * authoring bindings. In assert builds, they will do runtime type checking. + * + * @{ + */ + +/** + * @defgroup LLVMCCoreTypes Types and Enumerations + * + * @{ + */ typedef int LLVMBool; /* Opaque types. */ /** - * The top-level container for all LLVM global data. See the LLVMContext class. + * The top-level container for all LLVM global data. See the LLVMContext class. */ typedef struct LLVMOpaqueContext *LLVMContextRef; /** * The top-level container for all other LLVM Intermediate Representation (IR) - * objects. See the llvm::Module class. + * objects. + * + * @see llvm::Module */ typedef struct LLVMOpaqueModule *LLVMModuleRef; /** - * Each value in the LLVM IR has a type, an LLVMTypeRef. See the llvm::Type - * class. + * Each value in the LLVM IR has a type, an LLVMTypeRef. + * + * @see llvm::Type */ typedef struct LLVMOpaqueType *LLVMTypeRef; +/** + * Represents an individual value in LLVM IR. + * + * This models llvm::Value. + */ typedef struct LLVMOpaqueValue *LLVMValueRef; + +/** + * Represents a basic block of instruction in LLVM IR. + * + * This models llvm::BasicBlock. + */ typedef struct LLVMOpaqueBasicBlock *LLVMBasicBlockRef; + +/** + * Represents an LLVM basic block builder. + * + * This models llvm::IRBuilder. + */ typedef struct LLVMOpaqueBuilder *LLVMBuilderRef; -/* Interface used to provide a module to JIT or interpreter. This is now just a - * synonym for llvm::Module, but we have to keep using the different type to - * keep binary compatibility. +/** + * Interface used to provide a module to JIT or interpreter. + * This is now just a synonym for llvm::Module, but we have to keep using the + * different type to keep binary compatibility. */ typedef struct LLVMOpaqueModuleProvider *LLVMModuleProviderRef; -/* Used to provide a module to JIT or interpreter. - * See the llvm::MemoryBuffer class. +/** + * Used to provide a module to JIT or interpreter. + * + * @see llvm::MemoryBuffer */ typedef struct LLVMOpaqueMemoryBuffer *LLVMMemoryBufferRef; -/** See the llvm::PassManagerBase class. */ +/** @see llvm::PassManagerBase */ typedef struct LLVMOpaquePassManager *LLVMPassManagerRef; -/** See the llvm::PassRegistry class. */ +/** @see llvm::PassRegistry */ typedef struct LLVMOpaquePassRegistry *LLVMPassRegistryRef; -/** Used to get the users and usees of a Value. See the llvm::Use class. */ +/** + * Used to get the users and usees of a Value. + * + * @see llvm::Use */ typedef struct LLVMOpaqueUse *LLVMUseRef; typedef enum { @@ -119,6 +172,11 @@ typedef enum { LLVMReturnsTwice = 1 << 29, LLVMUWTable = 1 << 30, LLVMNonLazyBind = 1 << 31 + + // FIXME: This attribute is currently not included in the C API as + // a temporary measure until the API/ABI impact to the C API is understood + // and the path forward agreed upon. + //LLVMAddressSafety = 1ULL << 32 } LLVMAttribute; typedef enum { @@ -195,14 +253,13 @@ typedef enum { /* Exception Handling Operators */ LLVMResume = 58, - LLVMLandingPad = 59, - LLVMUnwind = 60 - + LLVMLandingPad = 59 } LLVMOpcode; typedef enum { LLVMVoidTypeKind, /**< type with no size */ + LLVMHalfTypeKind, /**< 16 bit floating point type */ LLVMFloatTypeKind, /**< 32 bit floating point type */ LLVMDoubleTypeKind, /**< 64 bit floating point type */ LLVMX86_FP80TypeKind, /**< 80 bit floating point type (X87) */ @@ -294,6 +351,10 @@ typedef enum { LLVMLandingPadFilter /**< A filter clause */ } LLVMLandingPadClauseTy; +/** + * @} + */ + void LLVMInitializeCore(LLVMPassRegistryRef R); @@ -302,49 +363,233 @@ void LLVMInitializeCore(LLVMPassRegistryRef R); void LLVMDisposeMessage(char *Message); -/*===-- Contexts ----------------------------------------------------------===*/ +/** + * @defgroup LLVMCCoreContext Contexts + * + * Contexts are execution states for the core LLVM IR system. + * + * Most types are tied to a context instance. Multiple contexts can + * exist simultaneously. A single context is not thread safe. However, + * different contexts can execute on different threads simultaneously. + * + * @{ + */ -/* Create and destroy contexts. */ +/** + * Create a new context. + * + * Every call to this function should be paired with a call to + * LLVMContextDispose() or the context will leak memory. + */ LLVMContextRef LLVMContextCreate(void); + +/** + * Obtain the global context instance. + */ LLVMContextRef LLVMGetGlobalContext(void); + +/** + * Destroy a context instance. + * + * This should be called for every call to LLVMContextCreate() or memory + * will be leaked. + */ void LLVMContextDispose(LLVMContextRef C); unsigned LLVMGetMDKindIDInContext(LLVMContextRef C, const char* Name, unsigned SLen); unsigned LLVMGetMDKindID(const char* Name, unsigned SLen); -/*===-- Modules -----------------------------------------------------------===*/ +/** + * @} + */ -/* Create and destroy modules. */ -/** See llvm::Module::Module. */ +/** + * @defgroup LLVMCCoreModule Modules + * + * Modules represent the top-level structure in a LLVM program. An LLVM + * module is effectively a translation unit or a collection of + * translation units merged together. + * + * @{ + */ + +/** + * Create a new, empty module in the global context. + * + * This is equivalent to calling LLVMModuleCreateWithNameInContext with + * LLVMGetGlobalContext() as the context parameter. + * + * Every invocation should be paired with LLVMDisposeModule() or memory + * will be leaked. + */ LLVMModuleRef LLVMModuleCreateWithName(const char *ModuleID); + +/** + * Create a new, empty module in a specific context. + * + * Every invocation should be paired with LLVMDisposeModule() or memory + * will be leaked. + */ LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID, LLVMContextRef C); -/** See llvm::Module::~Module. */ +/** + * Destroy a module instance. + * + * This must be called for every created module or memory will be + * leaked. + */ void LLVMDisposeModule(LLVMModuleRef M); -/** Data layout. See Module::getDataLayout. */ +/** + * Obtain the data layout for a module. + * + * @see Module::getDataLayout() + */ const char *LLVMGetDataLayout(LLVMModuleRef M); + +/** + * Set the data layout for a module. + * + * @see Module::setDataLayout() + */ void LLVMSetDataLayout(LLVMModuleRef M, const char *Triple); -/** Target triple. See Module::getTargetTriple. */ +/** + * Obtain the target triple for a module. + * + * @see Module::getTargetTriple() + */ const char *LLVMGetTarget(LLVMModuleRef M); + +/** + * Set the target triple for a module. + * + * @see Module::setTargetTriple() + */ void LLVMSetTarget(LLVMModuleRef M, const char *Triple); -/** See Module::dump. */ +/** + * Dump a representation of a module to stderr. + * + * @see Module::dump() + */ void LLVMDumpModule(LLVMModuleRef M); -/** See Module::setModuleInlineAsm. */ +/** + * Set inline assembly for a module. + * + * @see Module::setModuleInlineAsm() + */ void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm); -/** See Module::getContext. */ +/** + * Obtain the context to which this module is associated. + * + * @see Module::getContext() + */ LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M); -/*===-- Types -------------------------------------------------------------===*/ +/** + * Obtain a Type from a module by its registered name. + */ +LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name); -/* LLVM types conform to the following hierarchy: - * +/** + * Obtain the number of operands for named metadata in a module. + * + * @see llvm::Module::getNamedMetadata() + */ +unsigned LLVMGetNamedMetadataNumOperands(LLVMModuleRef M, const char* name); + +/** + * Obtain the named metadata operands for a module. + * + * The passed LLVMValueRef pointer should refer to an array of + * LLVMValueRef at least LLVMGetNamedMetadataNumOperands long. This + * array will be populated with the LLVMValueRef instances. Each + * instance corresponds to a llvm::MDNode. + * + * @see llvm::Module::getNamedMetadata() + * @see llvm::MDNode::getOperand() + */ +void LLVMGetNamedMetadataOperands(LLVMModuleRef M, const char* name, LLVMValueRef *Dest); + +/** + * Add an operand to named metadata. + * + * @see llvm::Module::getNamedMetadata() + * @see llvm::MDNode::addOperand() + */ +void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char* name, + LLVMValueRef Val); + +/** + * Add a function to a module under a specified name. + * + * @see llvm::Function::Create() + */ +LLVMValueRef LLVMAddFunction(LLVMModuleRef M, const char *Name, + LLVMTypeRef FunctionTy); + +/** + * Obtain a Function value from a Module by its name. + * + * The returned value corresponds to a llvm::Function value. + * + * @see llvm::Module::getFunction() + */ +LLVMValueRef LLVMGetNamedFunction(LLVMModuleRef M, const char *Name); + +/** + * Obtain an iterator to the first Function in a Module. + * + * @see llvm::Module::begin() + */ +LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M); + +/** + * Obtain an iterator to the last Function in a Module. + * + * @see llvm::Module::end() + */ +LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M); + +/** + * Advance a Function iterator to the next Function. + * + * Returns NULL if the iterator was already at the end and there are no more + * functions. + */ +LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn); + +/** + * Decrement a Function iterator to the previous Function. + * + * Returns NULL if the iterator was already at the beginning and there are + * no previous functions. + */ +LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn); + +/** + * @} + */ + +/** + * @defgroup LLVMCCoreType Types + * + * Types represent the type of a value. + * + * Types are associated with a context instance. The context internally + * deduplicates types so there is only 1 instance of a specific type + * alive at a time. In other words, a unique type is shared among all + * consumers within a context. + * + * A Type in the C API corresponds to llvm::Type. + * + * Types have the following hierarchy: + * * types: * integer type * real type @@ -356,16 +601,44 @@ LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M); * void type * label type * opaque type + * + * @{ */ -/** See llvm::LLVMTypeKind::getTypeID. */ +/** + * Obtain the enumerated type of a Type instance. + * + * @see llvm::Type:getTypeID() + */ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty); + +/** + * Whether the type has a known size. + * + * Things that don't have a size are abstract types, labels, and void.a + * + * @see llvm::Type::isSized() + */ LLVMBool LLVMTypeIsSized(LLVMTypeRef Ty); -/** See llvm::LLVMType::getContext. */ +/** + * Obtain the context to which this type instance is associated. + * + * @see llvm::Type::getContext() + */ LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty); -/* Operations on integer types */ +/** + * @defgroup LLVMCCoreTypeInt Integer Types + * + * Functions in this section operate on integer types. + * + * @{ + */ + +/** + * Obtain an integer type from a context with specified bit width. + */ LLVMTypeRef LLVMInt1TypeInContext(LLVMContextRef C); LLVMTypeRef LLVMInt8TypeInContext(LLVMContextRef C); LLVMTypeRef LLVMInt16TypeInContext(LLVMContextRef C); @@ -373,6 +646,10 @@ LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C); LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C); LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits); +/** + * Obtain an integer type from the global context with a specified bit + * width. + */ LLVMTypeRef LLVMInt1Type(void); LLVMTypeRef LLVMInt8Type(void); LLVMTypeRef LLVMInt16Type(void); @@ -381,68 +658,336 @@ LLVMTypeRef LLVMInt64Type(void); LLVMTypeRef LLVMIntType(unsigned NumBits); unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy); -/* Operations on real types */ +/** + * @} + */ + +/** + * @defgroup LLVMCCoreTypeFloat Floating Point Types + * + * @{ + */ + +/** + * Obtain a 16-bit floating point type from a context. + */ +LLVMTypeRef LLVMHalfTypeInContext(LLVMContextRef C); + +/** + * Obtain a 32-bit floating point type from a context. + */ LLVMTypeRef LLVMFloatTypeInContext(LLVMContextRef C); + +/** + * Obtain a 64-bit floating point type from a context. + */ LLVMTypeRef LLVMDoubleTypeInContext(LLVMContextRef C); + +/** + * Obtain a 80-bit floating point type (X87) from a context. + */ LLVMTypeRef LLVMX86FP80TypeInContext(LLVMContextRef C); + +/** + * Obtain a 128-bit floating point type (112-bit mantissa) from a + * context. + */ LLVMTypeRef LLVMFP128TypeInContext(LLVMContextRef C); + +/** + * Obtain a 128-bit floating point type (two 64-bits) from a context. + */ LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C); +/** + * Obtain a floating point type from the global context. + * + * These map to the functions in this group of the same name. + */ +LLVMTypeRef LLVMHalfType(void); LLVMTypeRef LLVMFloatType(void); LLVMTypeRef LLVMDoubleType(void); LLVMTypeRef LLVMX86FP80Type(void); LLVMTypeRef LLVMFP128Type(void); LLVMTypeRef LLVMPPCFP128Type(void); -/* Operations on function types */ +/** + * @} + */ + +/** + * @defgroup LLVMCCoreTypeFunction Function Types + * + * @{ + */ + +/** + * Obtain a function type consisting of a specified signature. + * + * The function is defined as a tuple of a return Type, a list of + * parameter types, and whether the function is variadic. + */ LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType, LLVMTypeRef *ParamTypes, unsigned ParamCount, LLVMBool IsVarArg); + +/** + * Returns whether a function type is variadic. + */ LLVMBool LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy); + +/** + * Obtain the Type this function Type returns. + */ LLVMTypeRef LLVMGetReturnType(LLVMTypeRef FunctionTy); + +/** + * Obtain the number of parameters this function accepts. + */ unsigned LLVMCountParamTypes(LLVMTypeRef FunctionTy); + +/** + * Obtain the types of a function's parameters. + * + * The Dest parameter should point to a pre-allocated array of + * LLVMTypeRef at least LLVMCountParamTypes() large. On return, the + * first LLVMCountParamTypes() entries in the array will be populated + * with LLVMTypeRef instances. + * + * @param FunctionTy The function type to operate on. + * @param Dest Memory address of an array to be filled with result. + */ void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest); -/* Operations on struct types */ +/** + * @} + */ + +/** + * @defgroup LLVMCCoreTypeStruct Structure Types + * + * These functions relate to LLVMTypeRef instances. + * + * @see llvm::StructType + * + * @{ + */ + +/** + * Create a new structure type in a context. + * + * A structure is specified by a list of inner elements/types and + * whether these can be packed together. + * + * @see llvm::StructType::create() + */ LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes, unsigned ElementCount, LLVMBool Packed); + +/** + * Create a new structure type in the global context. + * + * @see llvm::StructType::create() + */ LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes, unsigned ElementCount, LLVMBool Packed); + +/** + * Create an empty structure in a context having a specified name. + * + * @see llvm::StructType::create() + */ LLVMTypeRef LLVMStructCreateNamed(LLVMContextRef C, const char *Name); + +/** + * Obtain the name of a structure. + * + * @see llvm::StructType::getName() + */ const char *LLVMGetStructName(LLVMTypeRef Ty); + +/** + * Set the contents of a structure type. + * + * @see llvm::StructType::setBody() + */ void LLVMStructSetBody(LLVMTypeRef StructTy, LLVMTypeRef *ElementTypes, unsigned ElementCount, LLVMBool Packed); +/** + * Get the number of elements defined inside the structure. + * + * @see llvm::StructType::getNumElements() + */ unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy); + +/** + * Get the elements within a structure. + * + * The function is passed the address of a pre-allocated array of + * LLVMTypeRef at least LLVMCountStructElementTypes() long. After + * invocation, this array will be populated with the structure's + * elements. The objects in the destination array will have a lifetime + * of the structure type itself, which is the lifetime of the context it + * is contained in. + */ void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest); + +/** + * Determine whether a structure is packed. + * + * @see llvm::StructType::isPacked() + */ LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy); + +/** + * Determine whether a structure is opaque. + * + * @see llvm::StructType::isOpaque() + */ LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy); -LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name); +/** + * @} + */ -/* Operations on array, pointer, and vector types (sequence types) */ + +/** + * @defgroup LLVMCCoreTypeSequential Sequential Types + * + * Sequential types represents "arrays" of types. This is a super class + * for array, vector, and pointer types. + * + * @{ + */ + +/** + * Obtain the type of elements within a sequential type. + * + * This works on array, vector, and pointer types. + * + * @see llvm::SequentialType::getElementType() + */ +LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty); + +/** + * Create a fixed size array type that refers to a specific type. + * + * The created type will exist in the context that its element type + * exists in. + * + * @see llvm::ArrayType::get() + */ LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount); + +/** + * Obtain the length of an array type. + * + * This only works on types that represent arrays. + * + * @see llvm::ArrayType::getNumElements() + */ +unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy); + +/** + * Create a pointer type that points to a defined type. + * + * The created type will exist in the context that its pointee type + * exists in. + * + * @see llvm::PointerType::get() + */ LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace); + +/** + * Obtain the address space of a pointer type. + * + * This only works on types that represent pointers. + * + * @see llvm::PointerType::getAddressSpace() + */ +unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy); + +/** + * Create a vector type that contains a defined type and has a specific + * number of elements. + * + * The created type will exist in the context thats its element type + * exists in. + * + * @see llvm::VectorType::get() + */ LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount); -LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty); -unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy); -unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy); +/** + * Obtain the number of elements in a vector type. + * + * This only works on types that represent vectors. + * + * @see llvm::VectorType::getNumElements() + */ unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy); -/* Operations on other types */ +/** + * @} + */ + +/** + * @defgroup LLVMCCoreTypeOther Other Types + * + * @{ + */ + +/** + * Create a void type in a context. + */ LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C); + +/** + * Create a label type in a context. + */ LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C); + +/** + * Create a X86 MMX type in a context. + */ LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C); +/** + * These are similar to the above functions except they operate on the + * global context. + */ LLVMTypeRef LLVMVoidType(void); LLVMTypeRef LLVMLabelType(void); LLVMTypeRef LLVMX86MMXType(void); -/*===-- Values ------------------------------------------------------------===*/ +/** + * @} + */ -/* The bulk of LLVM's object model consists of values, which comprise a very +/** + * @} + */ + +/** + * @defgroup LLVMCCoreValues Values + * + * The bulk of LLVM's object model consists of values, which comprise a very * rich type hierarchy. + * + * LLVMValueRef essentially represents llvm::Value. There is a rich + * hierarchy of classes within this type. Depending on the instance + * obtain, not all APIs are available. + * + * Callers can determine the type of a LLVMValueRef by calling the + * LLVMIsA* family of functions (e.g. LLVMIsAArgument()). These + * functions are defined by a macro, so it isn't obvious which are + * available by looking at the Doxygen source code. Instead, look at the + * source definition of LLVM_FOR_EACH_VALUE_SUBCLASS and note the list + * of value names given. These value names also correspond to classes in + * the llvm::Value hierarchy. + * + * @{ */ #define LLVM_FOR_EACH_VALUE_SUBCLASS(macro) \ @@ -473,8 +1018,6 @@ LLVMTypeRef LLVMX86MMXType(void); macro(IntrinsicInst) \ macro(DbgInfoIntrinsic) \ macro(DbgDeclareInst) \ - macro(EHExceptionInst) \ - macro(EHSelectorInst) \ macro(MemIntrinsic) \ macro(MemCpyInst) \ macro(MemMoveInst) \ @@ -518,92 +1061,399 @@ LLVMTypeRef LLVMX86MMXType(void); macro(LoadInst) \ macro(VAArgInst) -/* Operations on all values */ -LLVMTypeRef LLVMTypeOf(LLVMValueRef Val); -const char *LLVMGetValueName(LLVMValueRef Val); -void LLVMSetValueName(LLVMValueRef Val, const char *Name); -void LLVMDumpValue(LLVMValueRef Val); -void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal); -int LLVMHasMetadata(LLVMValueRef Val); -LLVMValueRef LLVMGetMetadata(LLVMValueRef Val, unsigned KindID); -void LLVMSetMetadata(LLVMValueRef Val, unsigned KindID, LLVMValueRef Node); +/** + * @defgroup LLVMCCoreValueGeneral General APIs + * + * Functions in this section work on all LLVMValueRef instances, + * regardless of their sub-type. They correspond to functions available + * on llvm::Value. + * + * @{ + */ -/* Conversion functions. Return the input value if it is an instance of the - specified class, otherwise NULL. See llvm::dyn_cast_or_null<>. */ +/** + * Obtain the type of a value. + * + * @see llvm::Value::getType() + */ +LLVMTypeRef LLVMTypeOf(LLVMValueRef Val); + +/** + * Obtain the string name of a value. + * + * @see llvm::Value::getName() + */ +const char *LLVMGetValueName(LLVMValueRef Val); + +/** + * Set the string name of a value. + * + * @see llvm::Value::setName() + */ +void LLVMSetValueName(LLVMValueRef Val, const char *Name); + +/** + * Dump a representation of a value to stderr. + * + * @see llvm::Value::dump() + */ +void LLVMDumpValue(LLVMValueRef Val); + +/** + * Replace all uses of a value with another one. + * + * @see llvm::Value::replaceAllUsesWith() + */ +void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal); + +/** + * Determine whether the specified constant instance is constant. + */ +LLVMBool LLVMIsConstant(LLVMValueRef Val); + +/** + * Determine whether a value instance is undefined. + */ +LLVMBool LLVMIsUndef(LLVMValueRef Val); + +/** + * Convert value instances between types. + * + * Internally, a LLVMValueRef is "pinned" to a specific type. This + * series of functions allows you to cast an instance to a specific + * type. + * + * If the cast is not valid for the specified type, NULL is returned. + * + * @see llvm::dyn_cast_or_null<> + */ #define LLVM_DECLARE_VALUE_CAST(name) \ LLVMValueRef LLVMIsA##name(LLVMValueRef Val); LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DECLARE_VALUE_CAST) -/* Operations on Uses */ +/** + * @} + */ + +/** + * @defgroup LLVMCCoreValueUses Usage + * + * This module defines functions that allow you to inspect the uses of a + * LLVMValueRef. + * + * It is possible to obtain a LLVMUseRef for any LLVMValueRef instance. + * Each LLVMUseRef (which corresponds to a llvm::Use instance) holds a + * llvm::User and llvm::Value. + * + * @{ + */ + +/** + * Obtain the first use of a value. + * + * Uses are obtained in an iterator fashion. First, call this function + * to obtain a reference to the first use. Then, call LLVMGetNextUse() + * on that instance and all subsequently obtained instances untl + * LLVMGetNextUse() returns NULL. + * + * @see llvm::Value::use_begin() + */ LLVMUseRef LLVMGetFirstUse(LLVMValueRef Val); + +/** + * Obtain the next use of a value. + * + * This effectively advances the iterator. It returns NULL if you are on + * the final use and no more are available. + */ LLVMUseRef LLVMGetNextUse(LLVMUseRef U); + +/** + * Obtain the user value for a user. + * + * The returned value corresponds to a llvm::User type. + * + * @see llvm::Use::getUser() + */ LLVMValueRef LLVMGetUser(LLVMUseRef U); + +/** + * Obtain the value this use corresponds to. + * + * @see llvm::Use::get(). + */ LLVMValueRef LLVMGetUsedValue(LLVMUseRef U); -/* Operations on Users */ +/** + * @} + */ + +/** + * @defgroup LLVMCCoreValueUser User value + * + * Function in this group pertain to LLVMValueRef instances that descent + * from llvm::User. This includes constants, instructions, and + * operators. + * + * @{ + */ + +/** + * Obtain an operand at a specific index in a llvm::User value. + * + * @see llvm::User::getOperand() + */ LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index); + +/** + * Set an operand at a specific index in a llvm::User value. + * + * @see llvm::User::setOperand() + */ void LLVMSetOperand(LLVMValueRef User, unsigned Index, LLVMValueRef Val); + +/** + * Obtain the number of operands in a llvm::User value. + * + * @see llvm::User::getNumOperands() + */ int LLVMGetNumOperands(LLVMValueRef Val); -/* Operations on constants of any type */ +/** + * @} + */ + +/** + * @defgroup LLVMCCoreValueConstant Constants + * + * This section contains APIs for interacting with LLVMValueRef that + * correspond to llvm::Constant instances. + * + * These functions will work for any LLVMValueRef in the llvm::Constant + * class hierarchy. + * + * @{ + */ + +/** + * Obtain a constant value referring to the null instance of a type. + * + * @see llvm::Constant::getNullValue() + */ LLVMValueRef LLVMConstNull(LLVMTypeRef Ty); /* all zeroes */ -LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty); /* only for int/vector */ + +/** + * Obtain a constant value referring to the instance of a type + * consisting of all ones. + * + * This is only valid for integer types. + * + * @see llvm::Constant::getAllOnesValue() + */ +LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty); + +/** + * Obtain a constant value referring to an undefined value of a type. + * + * @see llvm::UndefValue::get() + */ LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty); -LLVMBool LLVMIsConstant(LLVMValueRef Val); + +/** + * Determine whether a value instance is null. + * + * @see llvm::Constant::isNullValue() + */ LLVMBool LLVMIsNull(LLVMValueRef Val); -LLVMBool LLVMIsUndef(LLVMValueRef Val); + +/** + * Obtain a constant that is a constant pointer pointing to NULL for a + * specified type. + */ LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty); -/* Operations on metadata */ -LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str, - unsigned SLen); -LLVMValueRef LLVMMDString(const char *Str, unsigned SLen); -LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals, - unsigned Count); -LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count); -const char *LLVMGetMDString(LLVMValueRef V, unsigned* Len); -int LLVMGetMDNodeNumOperands(LLVMValueRef V); -LLVMValueRef *LLVMGetMDNodeOperand(LLVMValueRef V, unsigned i); -unsigned LLVMGetNamedMetadataNumOperands(LLVMModuleRef M, const char* name); -void LLVMGetNamedMetadataOperands(LLVMModuleRef M, const char* name, LLVMValueRef *Dest); +/** + * @defgroup LLVMCCoreValueConstantScalar Scalar constants + * + * Functions in this group model LLVMValueRef instances that correspond + * to constants referring to scalar types. + * + * For integer types, the LLVMTypeRef parameter should correspond to a + * llvm::IntegerType instance and the returned LLVMValueRef will + * correspond to a llvm::ConstantInt. + * + * For floating point types, the LLVMTypeRef returned corresponds to a + * llvm::ConstantFP. + * + * @{ + */ -/* Operations on scalar constants */ +/** + * Obtain a constant value for an integer type. + * + * The returned value corresponds to a llvm::ConstantInt. + * + * @see llvm::ConstantInt::get() + * + * @param IntTy Integer type to obtain value of. + * @param N The value the returned instance should refer to. + * @param SignExtend Whether to sign extend the produced value. + */ LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N, LLVMBool SignExtend); + +/** + * Obtain a constant value for an integer of arbitrary precision. + * + * @see llvm::ConstantInt::get() + */ LLVMValueRef LLVMConstIntOfArbitraryPrecision(LLVMTypeRef IntTy, unsigned NumWords, const uint64_t Words[]); + +/** + * Obtain a constant value for an integer parsed from a string. + * + * A similar API, LLVMConstIntOfStringAndSize is also available. If the + * string's length is available, it is preferred to call that function + * instead. + * + * @see llvm::ConstantInt::get() + */ LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char *Text, uint8_t Radix); + +/** + * Obtain a constant value for an integer parsed from a string with + * specified length. + * + * @see llvm::ConstantInt::get() + */ LLVMValueRef LLVMConstIntOfStringAndSize(LLVMTypeRef IntTy, const char *Text, unsigned SLen, uint8_t Radix); + +/** + * Obtain a constant value referring to a double floating point value. + */ LLVMValueRef LLVMConstReal(LLVMTypeRef RealTy, double N); + +/** + * Obtain a constant for a floating point value parsed from a string. + * + * A similar API, LLVMConstRealOfStringAndSize is also available. It + * should be used if the input string's length is known. + */ LLVMValueRef LLVMConstRealOfString(LLVMTypeRef RealTy, const char *Text); + +/** + * Obtain a constant for a floating point value parsed from a string. + */ LLVMValueRef LLVMConstRealOfStringAndSize(LLVMTypeRef RealTy, const char *Text, unsigned SLen); + +/** + * Obtain the zero extended value for an integer constant value. + * + * @see llvm::ConstantInt::getZExtValue() + */ unsigned long long LLVMConstIntGetZExtValue(LLVMValueRef ConstantVal); + +/** + * Obtain the sign extended value for an integer constant value. + * + * @see llvm::ConstantInt::getSExtValue() + */ long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal); +/** + * @} + */ -/* Operations on composite constants */ +/** + * @defgroup LLVMCCoreValueConstantComposite Composite Constants + * + * Functions in this group operate on composite constants. + * + * @{ + */ + +/** + * Create a ConstantDataSequential and initialize it with a string. + * + * @see llvm::ConstantDataArray::getString() + */ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str, unsigned Length, LLVMBool DontNullTerminate); -LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, + +/** + * Create a ConstantDataSequential with string content in the global context. + * + * This is the same as LLVMConstStringInContext except it operates on the + * global context. + * + * @see LLVMConstStringInContext() + * @see llvm::ConstantDataArray::getString() + */ +LLVMValueRef LLVMConstString(const char *Str, unsigned Length, + LLVMBool DontNullTerminate); + +/** + * Create an anonymous ConstantStruct with the specified values. + * + * @see llvm::ConstantStruct::getAnon() + */ +LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, LLVMValueRef *ConstantVals, unsigned Count, LLVMBool Packed); -LLVMValueRef LLVMConstString(const char *Str, unsigned Length, - LLVMBool DontNullTerminate); -LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy, - LLVMValueRef *ConstantVals, unsigned Length); +/** + * Create a ConstantStruct in the global Context. + * + * This is the same as LLVMConstStructInContext except it operates on the + * global Context. + * + * @see LLVMConstStructInContext() + */ LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count, LLVMBool Packed); + +/** + * Create a ConstantArray from values. + * + * @see llvm::ConstantArray::get() + */ +LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy, + LLVMValueRef *ConstantVals, unsigned Length); + +/** + * Create a non-anonymous ConstantStruct from values. + * + * @see llvm::ConstantStruct::get() + */ LLVMValueRef LLVMConstNamedStruct(LLVMTypeRef StructTy, LLVMValueRef *ConstantVals, unsigned Count); + +/** + * Create a ConstantVector from values. + * + * @see llvm::ConstantVector::get() + */ LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size); -/* Constant expressions */ +/** + * @} + */ + +/** + * @defgroup LLVMCCoreValueConstantExpressions Constant Expressions + * + * Functions in this group correspond to APIs on llvm::ConstantExpr. + * + * @see llvm::ConstantExpr. + * + * @{ + */ LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal); LLVMValueRef LLVMAlignOf(LLVMTypeRef Ty); LLVMValueRef LLVMSizeOf(LLVMTypeRef Ty); @@ -690,7 +1540,21 @@ LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, LLVMBool HasSideEffects, LLVMBool IsAlignStack); LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB); -/* Operations on global variables, functions, and aliases (globals) */ +/** + * @} + */ + +/** + * @defgroup LLVMCCoreValueConstantGlobals Global Values + * + * This group contains functions that operate on global values. Functions in + * this group relate to functions in the llvm::GlobalValue class tree. + * + * @see llvm::GlobalValue + * + * @{ + */ + LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global); LLVMBool LLVMIsDeclaration(LLVMValueRef Global); LLVMLinkage LLVMGetLinkage(LLVMValueRef Global); @@ -702,7 +1566,15 @@ void LLVMSetVisibility(LLVMValueRef Global, LLVMVisibility Viz); unsigned LLVMGetAlignment(LLVMValueRef Global); void LLVMSetAlignment(LLVMValueRef Global, unsigned Bytes); -/* Operations on global variables */ +/** + * @defgroup LLVMCoreValueConstantGlobalVariable Global Variables + * + * This group contains functions that operate on global variable values. + * + * @see llvm::GlobalVariable + * + * @{ + */ LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name); LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name, @@ -720,110 +1592,672 @@ void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal); LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar); void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant); -/* Operations on aliases */ +/** + * @} + */ + +/** + * @defgroup LLVMCoreValueConstantGlobalAlias Global Aliases + * + * This group contains function that operate on global alias values. + * + * @see llvm::GlobalAlias + * + * @{ + */ LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee, const char *Name); -/* Operations on functions */ -LLVMValueRef LLVMAddFunction(LLVMModuleRef M, const char *Name, - LLVMTypeRef FunctionTy); -LLVMValueRef LLVMGetNamedFunction(LLVMModuleRef M, const char *Name); -LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M); -LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M); -LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn); -LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn); +/** + * @} + */ + +/** + * @defgroup LLVMCCoreValueFunction Function values + * + * Functions in this group operate on LLVMValueRef instances that + * correspond to llvm::Function instances. + * + * @see llvm::Function + * + * @{ + */ + +/** + * Remove a function from its containing module and deletes it. + * + * @see llvm::Function::eraseFromParent() + */ void LLVMDeleteFunction(LLVMValueRef Fn); + +/** + * Obtain the ID number from a function instance. + * + * @see llvm::Function::getIntrinsicID() + */ unsigned LLVMGetIntrinsicID(LLVMValueRef Fn); + +/** + * Obtain the calling function of a function. + * + * The returned value corresponds to the LLVMCallConv enumeration. + * + * @see llvm::Function::getCallingConv() + */ unsigned LLVMGetFunctionCallConv(LLVMValueRef Fn); + +/** + * Set the calling convention of a function. + * + * @see llvm::Function::setCallingConv() + * + * @param Fn Function to operate on + * @param CC LLVMCallConv to set calling convention to + */ void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC); + +/** + * Obtain the name of the garbage collector to use during code + * generation. + * + * @see llvm::Function::getGC() + */ const char *LLVMGetGC(LLVMValueRef Fn); + +/** + * Define the garbage collector to use during code generation. + * + * @see llvm::Function::setGC() + */ void LLVMSetGC(LLVMValueRef Fn, const char *Name); + +/** + * Add an attribute to a function. + * + * @see llvm::Function::addAttribute() + */ void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA); + +/** + * Obtain an attribute from a function. + * + * @see llvm::Function::getAttributes() + */ LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn); + +/** + * Remove an attribute from a function. + */ void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA); -/* Operations on parameters */ +/** + * @defgroup LLVMCCoreValueFunctionParameters Function Parameters + * + * Functions in this group relate to arguments/parameters on functions. + * + * Functions in this group expect LLVMValueRef instances that correspond + * to llvm::Function instances. + * + * @{ + */ + +/** + * Obtain the number of parameters in a function. + * + * @see llvm::Function::arg_size() + */ unsigned LLVMCountParams(LLVMValueRef Fn); + +/** + * Obtain the parameters in a function. + * + * The takes a pointer to a pre-allocated array of LLVMValueRef that is + * at least LLVMCountParams() long. This array will be filled with + * LLVMValueRef instances which correspond to the parameters the + * function receives. Each LLVMValueRef corresponds to a llvm::Argument + * instance. + * + * @see llvm::Function::arg_begin() + */ void LLVMGetParams(LLVMValueRef Fn, LLVMValueRef *Params); + +/** + * Obtain the parameter at the specified index. + * + * Parameters are indexed from 0. + * + * @see llvm::Function::arg_begin() + */ LLVMValueRef LLVMGetParam(LLVMValueRef Fn, unsigned Index); + +/** + * Obtain the function to which this argument belongs. + * + * Unlike other functions in this group, this one takes a LLVMValueRef + * that corresponds to a llvm::Attribute. + * + * The returned LLVMValueRef is the llvm::Function to which this + * argument belongs. + */ LLVMValueRef LLVMGetParamParent(LLVMValueRef Inst); + +/** + * Obtain the first parameter to a function. + * + * @see llvm::Function::arg_begin() + */ LLVMValueRef LLVMGetFirstParam(LLVMValueRef Fn); + +/** + * Obtain the last parameter to a function. + * + * @see llvm::Function::arg_end() + */ LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn); + +/** + * Obtain the next parameter to a function. + * + * This takes a LLVMValueRef obtained from LLVMGetFirstParam() (which is + * actually a wrapped iterator) and obtains the next parameter from the + * underlying iterator. + */ LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg); + +/** + * Obtain the previous parameter to a function. + * + * This is the opposite of LLVMGetNextParam(). + */ LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg); + +/** + * Add an attribute to a function argument. + * + * @see llvm::Argument::addAttr() + */ void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA); + +/** + * Remove an attribute from a function argument. + * + * @see llvm::Argument::removeAttr() + */ void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA); + +/** + * Get an attribute from a function argument. + */ LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg); + +/** + * Set the alignment for a function parameter. + * + * @see llvm::Argument::addAttr() + * @see llvm::Attribute::constructAlignmentFromInt() + */ void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align); -/* Operations on basic blocks */ +/** + * @} + */ + +/** + * @} + */ + +/** + * @} + */ + +/** + * @} + */ + +/** + * @defgroup LLVMCCoreValueMetadata Metadata + * + * @{ + */ + +/** + * Obtain a MDString value from a context. + * + * The returned instance corresponds to the llvm::MDString class. + * + * The instance is specified by string data of a specified length. The + * string content is copied, so the backing memory can be freed after + * this function returns. + */ +LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str, + unsigned SLen); + +/** + * Obtain a MDString value from the global context. + */ +LLVMValueRef LLVMMDString(const char *Str, unsigned SLen); + +/** + * Obtain a MDNode value from a context. + * + * The returned value corresponds to the llvm::MDNode class. + */ +LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals, + unsigned Count); + +/** + * Obtain a MDNode value from the global context. + */ +LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count); + +/** + * Obtain the underlying string from a MDString value. + * + * @param V Instance to obtain string from. + * @param Len Memory address which will hold length of returned string. + * @return String data in MDString. + */ +const char *LLVMGetMDString(LLVMValueRef V, unsigned* Len); + +/** + * @} + */ + +/** + * @defgroup LLVMCCoreValueBasicBlock Basic Block + * + * A basic block represents a single entry single exit section of code. + * Basic blocks contain a list of instructions which form the body of + * the block. + * + * Basic blocks belong to functions. They have the type of label. + * + * Basic blocks are themselves values. However, the C API models them as + * LLVMBasicBlockRef. + * + * @see llvm::BasicBlock + * + * @{ + */ + +/** + * Convert a basic block instance to a value type. + */ LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB); + +/** + * Determine whether a LLVMValueRef is itself a basic block. + */ LLVMBool LLVMValueIsBasicBlock(LLVMValueRef Val); + +/** + * Convert a LLVMValueRef to a LLVMBasicBlockRef instance. + */ LLVMBasicBlockRef LLVMValueAsBasicBlock(LLVMValueRef Val); + +/** + * Obtain the function to which a basic block belongs. + * + * @see llvm::BasicBlock::getParent() + */ LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB); + +/** + * Obtain the terminator instruction for a basic block. + * + * If the basic block does not have a terminator (it is not well-formed + * if it doesn't), then NULL is returned. + * + * The returned LLVMValueRef corresponds to a llvm::TerminatorInst. + * + * @see llvm::BasicBlock::getTerminator() + */ LLVMValueRef LLVMGetBasicBlockTerminator(LLVMBasicBlockRef BB); + +/** + * Obtain the number of basic blocks in a function. + * + * @param Fn Function value to operate on. + */ unsigned LLVMCountBasicBlocks(LLVMValueRef Fn); + +/** + * Obtain all of the basic blocks in a function. + * + * This operates on a function value. The BasicBlocks parameter is a + * pointer to a pre-allocated array of LLVMBasicBlockRef of at least + * LLVMCountBasicBlocks() in length. This array is populated with + * LLVMBasicBlockRef instances. + */ void LLVMGetBasicBlocks(LLVMValueRef Fn, LLVMBasicBlockRef *BasicBlocks); + +/** + * Obtain the first basic block in a function. + * + * The returned basic block can be used as an iterator. You will likely + * eventually call into LLVMGetNextBasicBlock() with it. + * + * @see llvm::Function::begin() + */ LLVMBasicBlockRef LLVMGetFirstBasicBlock(LLVMValueRef Fn); + +/** + * Obtain the last basic block in a function. + * + * @see llvm::Function::end() + */ LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn); + +/** + * Advance a basic block iterator. + */ LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB); + +/** + * Go backwards in a basic block iterator. + */ LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB); + +/** + * Obtain the basic block that corresponds to the entry point of a + * function. + * + * @see llvm::Function::getEntryBlock() + */ LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn); +/** + * Append a basic block to the end of a function. + * + * @see llvm::BasicBlock::Create() + */ LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C, LLVMValueRef Fn, const char *Name); + +/** + * Append a basic block to the end of a function using the global + * context. + * + * @see llvm::BasicBlock::Create() + */ +LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef Fn, const char *Name); + +/** + * Insert a basic block in a function before another basic block. + * + * The function to add to is determined by the function of the + * passed basic block. + * + * @see llvm::BasicBlock::Create() + */ LLVMBasicBlockRef LLVMInsertBasicBlockInContext(LLVMContextRef C, LLVMBasicBlockRef BB, const char *Name); -LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef Fn, const char *Name); +/** + * Insert a basic block in a function using the global context. + * + * @see llvm::BasicBlock::Create() + */ LLVMBasicBlockRef LLVMInsertBasicBlock(LLVMBasicBlockRef InsertBeforeBB, const char *Name); + +/** + * Remove a basic block from a function and delete it. + * + * This deletes the basic block from its containing function and deletes + * the basic block itself. + * + * @see llvm::BasicBlock::eraseFromParent() + */ void LLVMDeleteBasicBlock(LLVMBasicBlockRef BB); + +/** + * Remove a basic block from a function. + * + * This deletes the basic block from its containing function but keep + * the basic block alive. + * + * @see llvm::BasicBlock::removeFromParent() + */ void LLVMRemoveBasicBlockFromParent(LLVMBasicBlockRef BB); +/** + * Move a basic block to before another one. + * + * @see llvm::BasicBlock::moveBefore() + */ void LLVMMoveBasicBlockBefore(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos); + +/** + * Move a basic block to after another one. + * + * @see llvm::BasicBlock::moveAfter() + */ void LLVMMoveBasicBlockAfter(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos); +/** + * Obtain the first instruction in a basic block. + * + * The returned LLVMValueRef corresponds to a llvm::Instruction + * instance. + */ LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB); + +/** + * Obtain the last instruction in a basic block. + * + * The returned LLVMValueRef corresponds to a LLVM:Instruction. + */ LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB); -/* Operations on instructions */ +/** + * @} + */ + +/** + * @defgroup LLVMCCoreValueInstruction Instructions + * + * Functions in this group relate to the inspection and manipulation of + * individual instructions. + * + * In the C++ API, an instruction is modeled by llvm::Instruction. This + * class has a large number of descendents. llvm::Instruction is a + * llvm::Value and in the C API, instructions are modeled by + * LLVMValueRef. + * + * This group also contains sub-groups which operate on specific + * llvm::Instruction types, e.g. llvm::CallInst. + * + * @{ + */ + +/** + * Determine whether an instruction has any metadata attached. + */ +int LLVMHasMetadata(LLVMValueRef Val); + +/** + * Return metadata associated with an instruction value. + */ +LLVMValueRef LLVMGetMetadata(LLVMValueRef Val, unsigned KindID); + +/** + * Set metadata associated with an instruction value. + */ +void LLVMSetMetadata(LLVMValueRef Val, unsigned KindID, LLVMValueRef Node); + +/** + * Obtain the basic block to which an instruction belongs. + * + * @see llvm::Instruction::getParent() + */ LLVMBasicBlockRef LLVMGetInstructionParent(LLVMValueRef Inst); + +/** + * Obtain the instruction that occurs after the one specified. + * + * The next instruction will be from the same basic block. + * + * If this is the last instruction in a basic block, NULL will be + * returned. + */ LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst); + +/** + * Obtain the instruction that occured before this one. + * + * If the instruction is the first instruction in a basic block, NULL + * will be returned. + */ LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst); + +/** + * Remove and delete an instruction. + * + * The instruction specified is removed from its containing building + * block and then deleted. + * + * @see llvm::Instruction::eraseFromParent() + */ void LLVMInstructionEraseFromParent(LLVMValueRef Inst); + +/** + * Obtain the code opcode for an individual instruction. + * + * @see llvm::Instruction::getOpCode() + */ LLVMOpcode LLVMGetInstructionOpcode(LLVMValueRef Inst); + +/** + * Obtain the predicate of an instruction. + * + * This is only valid for instructions that correspond to llvm::ICmpInst + * or llvm::ConstantExpr whose opcode is llvm::Instruction::ICmp. + * + * @see llvm::ICmpInst::getPredicate() + */ LLVMIntPredicate LLVMGetICmpPredicate(LLVMValueRef Inst); -/* Operations on call sites */ +/** + * @defgroup LLVMCCoreValueInstructionCall Call Sites and Invocations + * + * Functions in this group apply to instructions that refer to call + * sites and invocations. These correspond to C++ types in the + * llvm::CallInst class tree. + * + * @{ + */ + +/** + * Set the calling convention for a call instruction. + * + * This expects an LLVMValueRef that corresponds to a llvm::CallInst or + * llvm::InvokeInst. + * + * @see llvm::CallInst::setCallingConv() + * @see llvm::InvokeInst::setCallingConv() + */ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC); + +/** + * Obtain the calling convention for a call instruction. + * + * This is the opposite of LLVMSetInstructionCallConv(). Reads its + * usage. + * + * @see LLVMSetInstructionCallConv() + */ unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr); + + void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index, LLVMAttribute); -void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index, +void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index, LLVMAttribute); -void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, +void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, unsigned align); -/* Operations on call instructions (only) */ +/** + * Obtain whether a call instruction is a tail call. + * + * This only works on llvm::CallInst instructions. + * + * @see llvm::CallInst::isTailCall() + */ LLVMBool LLVMIsTailCall(LLVMValueRef CallInst); + +/** + * Set whether a call instruction is a tail call. + * + * This only works on llvm::CallInst instructions. + * + * @see llvm::CallInst::setTailCall() + */ void LLVMSetTailCall(LLVMValueRef CallInst, LLVMBool IsTailCall); -/* Operations on switch instructions (only) */ +/** + * @} + */ + +/** + * Obtain the default destination basic block of a switch instruction. + * + * This only works on llvm::SwitchInst instructions. + * + * @see llvm::SwitchInst::getDefaultDest() + */ LLVMBasicBlockRef LLVMGetSwitchDefaultDest(LLVMValueRef SwitchInstr); -/* Operations on phi nodes */ +/** + * @defgroup LLVMCCoreValueInstructionPHINode PHI Nodes + * + * Functions in this group only apply to instructions that map to + * llvm::PHINode instances. + * + * @{ + */ + +/** + * Add an incoming value to the end of a PHI list. + */ void LLVMAddIncoming(LLVMValueRef PhiNode, LLVMValueRef *IncomingValues, LLVMBasicBlockRef *IncomingBlocks, unsigned Count); + +/** + * Obtain the number of incoming basic blocks to a PHI node. + */ unsigned LLVMCountIncoming(LLVMValueRef PhiNode); + +/** + * Obtain an incoming value to a PHI node as a LLVMValueRef. + */ LLVMValueRef LLVMGetIncomingValue(LLVMValueRef PhiNode, unsigned Index); + +/** + * Obtain an incoming value to a PHI node as a LLVMBasicBlockRef. + */ LLVMBasicBlockRef LLVMGetIncomingBlock(LLVMValueRef PhiNode, unsigned Index); -/*===-- Instruction builders ----------------------------------------------===*/ +/** + * @} + */ -/* An instruction builder represents a point within a basic block, and is the - * exclusive means of building instructions using the C interface. +/** + * @} + */ + +/** + * @} + */ + +/** + * @defgroup LLVMCCoreInstructionBuilder Instruction Builders + * + * An instruction builder represents a point within a basic block and is + * the exclusive means of building instructions using the C interface. + * + * @{ */ LLVMBuilderRef LLVMCreateBuilderInContext(LLVMContextRef C); @@ -964,6 +2398,8 @@ LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str, const char *Name); LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str, const char *Name); +LLVMBool LLVMGetVolatile(LLVMValueRef MemoryAccessInst); +void LLVMSetVolatile(LLVMValueRef MemoryAccessInst, LLVMBool IsVolatile); /* Casts */ LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef, LLVMValueRef Val, @@ -1044,21 +2480,37 @@ LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef, LLVMValueRef Val, LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); +/** + * @} + */ -/*===-- Module providers --------------------------------------------------===*/ +/** + * @defgroup LLVMCCoreModuleProvider Module Providers + * + * @{ + */ -/* Changes the type of M so it can be passed to FunctionPassManagers and the +/** + * Changes the type of M so it can be passed to FunctionPassManagers and the * JIT. They take ModuleProviders for historical reasons. */ LLVMModuleProviderRef LLVMCreateModuleProviderForExistingModule(LLVMModuleRef M); -/* Destroys the module M. +/** + * Destroys the module M. */ void LLVMDisposeModuleProvider(LLVMModuleProviderRef M); +/** + * @} + */ -/*===-- Memory buffers ----------------------------------------------------===*/ +/** + * @defgroup LLVMCCoreMemoryBuffers Memory Buffers + * + * @{ + */ LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(const char *Path, LLVMMemoryBufferRef *OutMemBuf, @@ -1067,23 +2519,39 @@ LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf, char **OutMessage); void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf); -/*===-- Pass Registry -----------------------------------------------------===*/ +/** + * @} + */ + +/** + * @defgroup LLVMCCorePassRegistry Pass Registry + * + * @{ + */ /** Return the global pass registry, for use with initialization functions. - See llvm::PassRegistry::getPassRegistry. */ + @see llvm::PassRegistry::getPassRegistry */ LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void); -/*===-- Pass Managers -----------------------------------------------------===*/ +/** + * @} + */ + +/** + * @defgroup LLVMCCorePassManagers Pass Managers + * + * @{ + */ /** Constructs a new whole-module pass pipeline. This type of pipeline is suitable for link-time optimization and whole-module transformations. - See llvm::PassManager::PassManager. */ + @see llvm::PassManager::PassManager */ LLVMPassManagerRef LLVMCreatePassManager(void); /** Constructs a new function-by-function pass pipeline over the module provider. It does not take ownership of the module provider. This type of pipeline is suitable for code generation and JIT compilation tasks. - See llvm::FunctionPassManager::FunctionPassManager. */ + @see llvm::FunctionPassManager::FunctionPassManager */ LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M); /** Deprecated: Use LLVMCreateFunctionPassManagerForModule instead. */ @@ -1091,30 +2559,42 @@ LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef MP); /** Initializes, executes on the provided module, and finalizes all of the passes scheduled in the pass manager. Returns 1 if any of the passes - modified the module, 0 otherwise. See llvm::PassManager::run(Module&). */ + modified the module, 0 otherwise. + @see llvm::PassManager::run(Module&) */ LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M); /** Initializes all of the function passes scheduled in the function pass manager. Returns 1 if any of the passes modified the module, 0 otherwise. - See llvm::FunctionPassManager::doInitialization. */ + @see llvm::FunctionPassManager::doInitialization */ LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM); /** Executes all of the function passes scheduled in the function pass manager on the provided function. Returns 1 if any of the passes modified the function, false otherwise. - See llvm::FunctionPassManager::run(Function&). */ + @see llvm::FunctionPassManager::run(Function&) */ LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F); /** Finalizes all of the function passes scheduled in in the function pass manager. Returns 1 if any of the passes modified the module, 0 otherwise. - See llvm::FunctionPassManager::doFinalization. */ + @see llvm::FunctionPassManager::doFinalization */ LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM); /** Frees the memory of a pass pipeline. For function pipelines, does not free the module provider. - See llvm::PassManagerBase::~PassManagerBase. */ + @see llvm::PassManagerBase::~PassManagerBase. */ void LLVMDisposePassManager(LLVMPassManagerRef PM); +/** + * @} + */ + +/** + * @} + */ + +/** + * @} + */ #ifdef __cplusplus } diff --git a/contrib/llvm/include/llvm-c/Disassembler.h b/contrib/llvm/include/llvm-c/Disassembler.h index bf2f2767cd34..a676e37768e4 100644 --- a/contrib/llvm/include/llvm-c/Disassembler.h +++ b/contrib/llvm/include/llvm-c/Disassembler.h @@ -18,6 +18,13 @@ #include "llvm/Support/DataTypes.h" #include +/** + * @defgroup LLVMCDisassembler Disassembler + * @ingroup LLVMC + * + * @{ + */ + /** * An opaque reference to a disassembler context. */ @@ -157,6 +164,10 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DC, uint8_t *Bytes, uint64_t BytesSize, uint64_t PC, char *OutString, size_t OutStringSize); +/** + * @} + */ + #ifdef __cplusplus } #endif /* !defined(__cplusplus) */ diff --git a/contrib/llvm/include/llvm-c/EnhancedDisassembly.h b/contrib/llvm/include/llvm-c/EnhancedDisassembly.h index 0c173c2b1999..71a0d496c028 100644 --- a/contrib/llvm/include/llvm-c/EnhancedDisassembly.h +++ b/contrib/llvm/include/llvm-c/EnhancedDisassembly.h @@ -25,6 +25,19 @@ extern "C" { #endif +/** + * @defgroup LLVMCEnhancedDisassembly Enhanced Disassembly + * @ingroup LLVMC + * @deprecated + * + * This module contains an interface to the Enhanced Disassembly (edis) + * library. The edis library is deprecated and will likely disappear in + * the near future. You should use the @ref LLVMCDisassembler interface + * instead. + * + * @{ + */ + /*! @typedef EDByteReaderCallback Interface to memory from which instructions may be read. @@ -504,6 +517,10 @@ int EDBlockEvaluateOperand(uint64_t *result, int EDBlockVisitTokens(EDInstRef inst, EDTokenVisitor_t visitor); +/** + * @} + */ + #endif #ifdef __cplusplus diff --git a/contrib/llvm/include/llvm-c/ExecutionEngine.h b/contrib/llvm/include/llvm-c/ExecutionEngine.h index f5f40619ef0d..cb77bb2e2e23 100644 --- a/contrib/llvm/include/llvm-c/ExecutionEngine.h +++ b/contrib/llvm/include/llvm-c/ExecutionEngine.h @@ -26,6 +26,13 @@ extern "C" { #endif +/** + * @defgroup LLVMCExecutionEngine Execution Engine + * @ingroup LLVMC + * + * @{ + */ + void LLVMLinkInJIT(void); void LLVMLinkInInterpreter(void); @@ -125,6 +132,10 @@ void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global, void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global); +/** + * @} + */ + #ifdef __cplusplus } diff --git a/contrib/llvm/include/llvm-c/Initialization.h b/contrib/llvm/include/llvm-c/Initialization.h index 3b59abbec03c..cb3ab9e3f393 100644 --- a/contrib/llvm/include/llvm-c/Initialization.h +++ b/contrib/llvm/include/llvm-c/Initialization.h @@ -22,9 +22,19 @@ extern "C" { #endif +/** + * @defgroup LLVMCInitialization Initialization Routines + * @ingroup LLVMC + * + * This module contains routines used to initialize the LLVM system. + * + * @{ + */ + void LLVMInitializeCore(LLVMPassRegistryRef R); void LLVMInitializeTransformUtils(LLVMPassRegistryRef R); void LLVMInitializeScalarOpts(LLVMPassRegistryRef R); +void LLVMInitializeVectorization(LLVMPassRegistryRef R); void LLVMInitializeInstCombine(LLVMPassRegistryRef R); void LLVMInitializeIPO(LLVMPassRegistryRef R); void LLVMInitializeInstrumentation(LLVMPassRegistryRef R); @@ -33,6 +43,10 @@ void LLVMInitializeIPA(LLVMPassRegistryRef R); void LLVMInitializeCodeGen(LLVMPassRegistryRef R); void LLVMInitializeTarget(LLVMPassRegistryRef R); +/** + * @} + */ + #ifdef __cplusplus } #endif diff --git a/contrib/llvm/include/llvm-c/LinkTimeOptimizer.h b/contrib/llvm/include/llvm-c/LinkTimeOptimizer.h index fca394681c76..5338d3fc4c85 100644 --- a/contrib/llvm/include/llvm-c/LinkTimeOptimizer.h +++ b/contrib/llvm/include/llvm-c/LinkTimeOptimizer.h @@ -20,6 +20,13 @@ extern "C" { #endif +/** + * @defgroup LLVMCLinkTimeOptimizer Link Time Optimization + * @ingroup LLVMC + * + * @{ + */ + /// This provides a dummy type for pointers to the LTO object. typedef void* llvm_lto_t; @@ -51,6 +58,10 @@ extern "C" { extern llvm_lto_status_t llvm_optimize_modules (llvm_lto_t lto, const char* output_filename); +/** + * @} + */ + #ifdef __cplusplus } #endif diff --git a/contrib/llvm/include/llvm-c/Object.h b/contrib/llvm/include/llvm-c/Object.h index 7b1cf717f777..e2dad62b4e07 100644 --- a/contrib/llvm/include/llvm-c/Object.h +++ b/contrib/llvm/include/llvm-c/Object.h @@ -28,23 +28,74 @@ extern "C" { #endif +/** + * @defgroup LLVMCObject Object file reading and writing + * @ingroup LLVMC + * + * @{ + */ +// Opaque type wrappers typedef struct LLVMOpaqueObjectFile *LLVMObjectFileRef; - typedef struct LLVMOpaqueSectionIterator *LLVMSectionIteratorRef; +typedef struct LLVMOpaqueSymbolIterator *LLVMSymbolIteratorRef; +typedef struct LLVMOpaqueRelocationIterator *LLVMRelocationIteratorRef; +// ObjectFile creation LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf); void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile); +// ObjectFile Section iterators LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile); void LLVMDisposeSectionIterator(LLVMSectionIteratorRef SI); LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile, LLVMSectionIteratorRef SI); void LLVMMoveToNextSection(LLVMSectionIteratorRef SI); +void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect, + LLVMSymbolIteratorRef Sym); + +// ObjectFile Symbol iterators +LLVMSymbolIteratorRef LLVMGetSymbols(LLVMObjectFileRef ObjectFile); +void LLVMDisposeSymbolIterator(LLVMSymbolIteratorRef SI); +LLVMBool LLVMIsSymbolIteratorAtEnd(LLVMObjectFileRef ObjectFile, + LLVMSymbolIteratorRef SI); +void LLVMMoveToNextSymbol(LLVMSymbolIteratorRef SI); + +// SectionRef accessors const char *LLVMGetSectionName(LLVMSectionIteratorRef SI); uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI); const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI); +uint64_t LLVMGetSectionAddress(LLVMSectionIteratorRef SI); +LLVMBool LLVMGetSectionContainsSymbol(LLVMSectionIteratorRef SI, + LLVMSymbolIteratorRef Sym); +// Section Relocation iterators +LLVMRelocationIteratorRef LLVMGetRelocations(LLVMSectionIteratorRef Section); +void LLVMDisposeRelocationIterator(LLVMRelocationIteratorRef RI); +LLVMBool LLVMIsRelocationIteratorAtEnd(LLVMSectionIteratorRef Section, + LLVMRelocationIteratorRef RI); +void LLVMMoveToNextRelocation(LLVMRelocationIteratorRef RI); + + +// SymbolRef accessors +const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI); +uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI); +uint64_t LLVMGetSymbolFileOffset(LLVMSymbolIteratorRef SI); +uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI); + +// RelocationRef accessors +uint64_t LLVMGetRelocationAddress(LLVMRelocationIteratorRef RI); +uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI); +LLVMSymbolIteratorRef LLVMGetRelocationSymbol(LLVMRelocationIteratorRef RI); +uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI); +// NOTE: Caller takes ownership of returned string of the two +// following functions. +const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI); +const char *LLVMGetRelocationValueString(LLVMRelocationIteratorRef RI); + +/** + * @} + */ #ifdef __cplusplus } @@ -68,6 +119,27 @@ namespace llvm { return reinterpret_cast (const_cast(SI)); } + + inline symbol_iterator *unwrap(LLVMSymbolIteratorRef SI) { + return reinterpret_cast(SI); + } + + inline LLVMSymbolIteratorRef + wrap(const symbol_iterator *SI) { + return reinterpret_cast + (const_cast(SI)); + } + + inline relocation_iterator *unwrap(LLVMRelocationIteratorRef SI) { + return reinterpret_cast(SI); + } + + inline LLVMRelocationIteratorRef + wrap(const relocation_iterator *SI) { + return reinterpret_cast + (const_cast(SI)); + } + } } diff --git a/contrib/llvm/include/llvm-c/Target.h b/contrib/llvm/include/llvm-c/Target.h index 7afaef15c419..568e60dfb43e 100644 --- a/contrib/llvm/include/llvm-c/Target.h +++ b/contrib/llvm/include/llvm-c/Target.h @@ -26,6 +26,13 @@ extern "C" { #endif +/** + * @defgroup LLVMCTarget Target information + * @ingroup LLVMC + * + * @{ + */ + enum LLVMByteOrdering { LLVMBigEndian, LLVMLittleEndian }; typedef struct LLVMOpaqueTargetData *LLVMTargetDataRef; @@ -47,6 +54,24 @@ typedef struct LLVMStructLayout *LLVMStructLayoutRef; #include "llvm/Config/Targets.def" #undef LLVM_TARGET /* Explicit undef to make SWIG happier */ +/* Declare all of the available assembly printer initialization functions. */ +#define LLVM_ASM_PRINTER(TargetName) \ + void LLVMInitialize##TargetName##AsmPrinter(); +#include "llvm/Config/AsmPrinters.def" +#undef LLVM_ASM_PRINTER /* Explicit undef to make SWIG happier */ + +/* Declare all of the available assembly parser initialization functions. */ +#define LLVM_ASM_PARSER(TargetName) \ + void LLVMInitialize##TargetName##AsmParser(); +#include "llvm/Config/AsmParsers.def" +#undef LLVM_ASM_PARSER /* Explicit undef to make SWIG happier */ + +/* Declare all of the available disassembler initialization functions. */ +#define LLVM_DISASSEMBLER(TargetName) \ + void LLVMInitialize##TargetName##Disassembler(); +#include "llvm/Config/Disassemblers.def" +#undef LLVM_DISASSEMBLER /* Explicit undef to make SWIG happier */ + /** LLVMInitializeAllTargetInfos - The main program should call this function if it wants access to all available targets that LLVM is configured to support. */ @@ -64,6 +89,43 @@ static inline void LLVMInitializeAllTargets(void) { #include "llvm/Config/Targets.def" #undef LLVM_TARGET /* Explicit undef to make SWIG happier */ } + +/** LLVMInitializeAllTargetMCs - The main program should call this function if + it wants access to all available target MC that LLVM is configured to + support. */ +static inline void LLVMInitializeAllTargetMCs(void) { +#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##TargetMC(); +#include "llvm/Config/Targets.def" +#undef LLVM_TARGET /* Explicit undef to make SWIG happier */ +} + +/** LLVMInitializeAllAsmPrinters - The main program should call this function if + it wants all asm printers that LLVM is configured to support, to make them + available via the TargetRegistry. */ +static inline void LLVMInitializeAllAsmPrinters() { +#define LLVM_ASM_PRINTER(TargetName) LLVMInitialize##TargetName##AsmPrinter(); +#include "llvm/Config/AsmPrinters.def" +#undef LLVM_ASM_PRINTER /* Explicit undef to make SWIG happier */ +} + +/** LLVMInitializeAllAsmParsers - The main program should call this function if + it wants all asm parsers that LLVM is configured to support, to make them + available via the TargetRegistry. */ +static inline void LLVMInitializeAllAsmParsers() { +#define LLVM_ASM_PARSER(TargetName) LLVMInitialize##TargetName##AsmParser(); +#include "llvm/Config/AsmParsers.def" +#undef LLVM_ASM_PARSER /* Explicit undef to make SWIG happier */ +} + +/** LLVMInitializeAllDisassemblers - The main program should call this function + if it wants all disassemblers that LLVM is configured to support, to make + them available via the TargetRegistry. */ +static inline void LLVMInitializeAllDisassemblers() { +#define LLVM_DISASSEMBLER(TargetName) \ + LLVMInitialize##TargetName##Disassembler(); +#include "llvm/Config/Disassemblers.def" +#undef LLVM_DISASSEMBLER /* Explicit undef to make SWIG happier */ +} /** LLVMInitializeNativeTarget - The main program should call this function to initialize the native target corresponding to the host. This is useful @@ -157,6 +219,9 @@ unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef, LLVMTypeRef StructTy, See the destructor llvm::TargetData::~TargetData. */ void LLVMDisposeTargetData(LLVMTargetDataRef); +/** + * @} + */ #ifdef __cplusplus } diff --git a/contrib/llvm/include/llvm-c/TargetMachine.h b/contrib/llvm/include/llvm-c/TargetMachine.h new file mode 100644 index 000000000000..0d35d73a11df --- /dev/null +++ b/contrib/llvm/include/llvm-c/TargetMachine.h @@ -0,0 +1,142 @@ +/*===-- llvm-c/TargetMachine.h - Target Machine Library C Interface - C++ -*-=*\ +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This header declares the C interface to the Target and TargetMachine *| +|* classes, which can be used to generate assembly or object files. *| +|* *| +|* Many exotic languages can interoperate with C code but have a harder time *| +|* with C++ due to name mangling. So in addition to C, this interface enables *| +|* tools written in such languages. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_C_TARGETMACHINE_H +#define LLVM_C_TARGETMACHINE_H + +#include "llvm-c/Core.h" + +#ifdef __cplusplus +extern "C" { +#endif +typedef struct LLVMTargetMachine *LLVMTargetMachineRef; +typedef struct LLVMTarget *LLVMTargetRef; + +typedef enum { + LLVMCodeGenLevelNone, + LLVMCodeGenLevelLess, + LLVMCodeGenLevelDefault, + LLVMCodeGenLevelAggressive +} LLVMCodeGenOptLevel; + +typedef enum { + LLVMRelocDefault, + LLVMRelocStatic, + LLVMRelocPIC, + LLVMRelocDynamicNoPic +} LLVMRelocMode; + +typedef enum { + LLVMCodeModelDefault, + LLVMCodeModelJITDefault, + LLVMCodeModelSmall, + LLVMCodeModelKernel, + LLVMCodeModelMedium, + LLVMCodeModelLarge +} LLVMCodeModel; + +typedef enum { + LLVMAssemblyFile, + LLVMObjectFile +} LLVMCodeGenFileType; + +/** Returns the first llvm::Target in the registered targets list. */ +LLVMTargetRef LLVMGetFirstTarget(); +/** Returns the next llvm::Target given a previous one (or null if there's none) */ +LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T); + +/*===-- Target ------------------------------------------------------------===*/ +/** Returns the name of a target. See llvm::Target::getName */ +const char *LLVMGetTargetName(LLVMTargetRef T); + +/** Returns the description of a target. See llvm::Target::getDescription */ +const char *LLVMGetTargetDescription(LLVMTargetRef T); + +/** Returns if the target has a JIT */ +LLVMBool LLVMTargetHasJIT(LLVMTargetRef T); + +/** Returns if the target has a TargetMachine associated */ +LLVMBool LLVMTargetHasTargetMachine(LLVMTargetRef T); + +/** Returns if the target as an ASM backend (required for emitting output) */ +LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T); + +/*===-- Target Machine ----------------------------------------------------===*/ +/** Creates a new llvm::TargetMachine. See llvm::Target::createTargetMachine */ +LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char *Triple, + char *CPU, char *Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc, + LLVMCodeModel CodeModel); + +/** Dispose the LLVMTargetMachineRef instance generated by + LLVMCreateTargetMachine. */ +void LLVMDisposeTargetMachine(LLVMTargetMachineRef T); + +/** Returns the Target used in a TargetMachine */ +LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T); + +/** Returns the triple used creating this target machine. See + llvm::TargetMachine::getTriple. The result needs to be disposed with + LLVMDisposeMessage. */ +char *LLVMGetTargetMachineTriple(LLVMTargetMachineRef T); + +/** Returns the cpu used creating this target machine. See + llvm::TargetMachine::getCPU. The result needs to be disposed with + LLVMDisposeMessage. */ +char *LLVMGetTargetMachineCPU(LLVMTargetMachineRef T); + +/** Returns the feature string used creating this target machine. See + llvm::TargetMachine::getFeatureString. The result needs to be disposed with + LLVMDisposeMessage. */ +char *LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T); + +/** Returns the llvm::TargetData used for this llvm:TargetMachine. */ +LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T); + +/** Emits an asm or object file for the given module to the filename. This + wraps several c++ only classes (among them a file stream). Returns any + error in ErrorMessage. Use LLVMDisposeMessage to dispose the message. */ +LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, + char *Filename, LLVMCodeGenFileType codegen, char **ErrorMessage); + + + + +#ifdef __cplusplus +} + +namespace llvm { + class TargetMachine; + class Target; + + inline TargetMachine *unwrap(LLVMTargetMachineRef P) { + return reinterpret_cast(P); + } + inline Target *unwrap(LLVMTargetRef P) { + return reinterpret_cast(P); + } + inline LLVMTargetMachineRef wrap(const TargetMachine *P) { + return reinterpret_cast( + const_cast(P)); + } + inline LLVMTargetRef wrap(const Target * P) { + return reinterpret_cast(const_cast(P)); + } +} +#endif + +#endif diff --git a/contrib/llvm/include/llvm-c/Transforms/IPO.h b/contrib/llvm/include/llvm-c/Transforms/IPO.h index 710bebe598be..448078012eac 100644 --- a/contrib/llvm/include/llvm-c/Transforms/IPO.h +++ b/contrib/llvm/include/llvm-c/Transforms/IPO.h @@ -21,6 +21,13 @@ extern "C" { #endif +/** + * @defgroup LLVMCTransformsIPO Interprocedural transformations + * @ingroup LLVMCTransforms + * + * @{ + */ + /** See llvm::createArgumentPromotionPass function. */ void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM); @@ -63,6 +70,10 @@ void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM); /** See llvm::createStripSymbolsPass function. */ void LLVMAddStripSymbolsPass(LLVMPassManagerRef PM); +/** + * @} + */ + #ifdef __cplusplus } #endif /* defined(__cplusplus) */ diff --git a/contrib/llvm/include/llvm-c/Transforms/PassManagerBuilder.h b/contrib/llvm/include/llvm-c/Transforms/PassManagerBuilder.h index fa722c95874d..cee6e5a0ee08 100644 --- a/contrib/llvm/include/llvm-c/Transforms/PassManagerBuilder.h +++ b/contrib/llvm/include/llvm-c/Transforms/PassManagerBuilder.h @@ -23,6 +23,13 @@ typedef struct LLVMOpaquePassManagerBuilder *LLVMPassManagerBuilderRef; extern "C" { #endif +/** + * @defgroup LLVMCTransformsPassManagerBuilder Pass manager builder + * @ingroup LLVMCTransforms + * + * @{ + */ + /** See llvm::PassManagerBuilder. */ LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate(void); void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB); @@ -73,6 +80,10 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, bool Internalize, bool RunInliner); +/** + * @} + */ + #ifdef __cplusplus } diff --git a/contrib/llvm/include/llvm-c/Transforms/Scalar.h b/contrib/llvm/include/llvm-c/Transforms/Scalar.h index 6015ef90eed2..a2c4d6116f03 100644 --- a/contrib/llvm/include/llvm-c/Transforms/Scalar.h +++ b/contrib/llvm/include/llvm-c/Transforms/Scalar.h @@ -25,6 +25,13 @@ extern "C" { #endif +/** + * @defgroup LLVMCTransformsScalar Scalar transformations + * @ingroup LLVMCTransforms + * + * @{ + */ + /** See llvm::createAggressiveDCEPass function. */ void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM); @@ -116,6 +123,9 @@ void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM); /** See llvm::createBasicAliasAnalysisPass function */ void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM); +/** + * @} + */ #ifdef __cplusplus } diff --git a/contrib/llvm/include/llvm-c/Transforms/Vectorize.h b/contrib/llvm/include/llvm-c/Transforms/Vectorize.h new file mode 100644 index 000000000000..9e7c7540d766 --- /dev/null +++ b/contrib/llvm/include/llvm-c/Transforms/Vectorize.h @@ -0,0 +1,48 @@ +/*===---------------------------Vectorize.h --------------------- -*- C -*-===*\ +|*===----------- Vectorization Transformation Library C Interface ---------===*| +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This header declares the C interface to libLLVMVectorize.a, which *| +|* implements various vectorization transformations of the LLVM IR. *| +|* *| +|* Many exotic languages can interoperate with C code but have a harder time *| +|* with C++ due to name mangling. So in addition to C, this interface enables *| +|* tools written in such languages. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_C_TRANSFORMS_VECTORIZE_H +#define LLVM_C_TRANSFORMS_VECTORIZE_H + +#include "llvm-c/Core.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @defgroup LLVMCTransformsVectorize Vectorization transformations + * @ingroup LLVMCTransforms + * + * @{ + */ + +/** See llvm::createBBVectorizePass function. */ +void LLVMAddBBVectorizePass(LLVMPassManagerRef PM); + +/** + * @} + */ + +#ifdef __cplusplus +} +#endif /* defined(__cplusplus) */ + +#endif + diff --git a/contrib/llvm/include/llvm-c/lto.h b/contrib/llvm/include/llvm-c/lto.h index 7ea7ad01a211..5d9cecbc5153 100644 --- a/contrib/llvm/include/llvm-c/lto.h +++ b/contrib/llvm/include/llvm-c/lto.h @@ -20,24 +20,31 @@ #include #include +/** + * @defgroup LLVMCLTO LTO + * @ingroup LLVMC + * + * @{ + */ + #define LTO_API_VERSION 4 typedef enum { LTO_SYMBOL_ALIGNMENT_MASK = 0x0000001F, /* log2 of alignment */ - LTO_SYMBOL_PERMISSIONS_MASK = 0x000000E0, - LTO_SYMBOL_PERMISSIONS_CODE = 0x000000A0, - LTO_SYMBOL_PERMISSIONS_DATA = 0x000000C0, - LTO_SYMBOL_PERMISSIONS_RODATA = 0x00000080, - LTO_SYMBOL_DEFINITION_MASK = 0x00000700, - LTO_SYMBOL_DEFINITION_REGULAR = 0x00000100, - LTO_SYMBOL_DEFINITION_TENTATIVE = 0x00000200, - LTO_SYMBOL_DEFINITION_WEAK = 0x00000300, - LTO_SYMBOL_DEFINITION_UNDEFINED = 0x00000400, + LTO_SYMBOL_PERMISSIONS_MASK = 0x000000E0, + LTO_SYMBOL_PERMISSIONS_CODE = 0x000000A0, + LTO_SYMBOL_PERMISSIONS_DATA = 0x000000C0, + LTO_SYMBOL_PERMISSIONS_RODATA = 0x00000080, + LTO_SYMBOL_DEFINITION_MASK = 0x00000700, + LTO_SYMBOL_DEFINITION_REGULAR = 0x00000100, + LTO_SYMBOL_DEFINITION_TENTATIVE = 0x00000200, + LTO_SYMBOL_DEFINITION_WEAK = 0x00000300, + LTO_SYMBOL_DEFINITION_UNDEFINED = 0x00000400, LTO_SYMBOL_DEFINITION_WEAKUNDEF = 0x00000500, - LTO_SYMBOL_SCOPE_MASK = 0x00003800, - LTO_SYMBOL_SCOPE_INTERNAL = 0x00000800, - LTO_SYMBOL_SCOPE_HIDDEN = 0x00001000, - LTO_SYMBOL_SCOPE_PROTECTED = 0x00002000, + LTO_SYMBOL_SCOPE_MASK = 0x00003800, + LTO_SYMBOL_SCOPE_INTERNAL = 0x00000800, + LTO_SYMBOL_SCOPE_HIDDEN = 0x00001000, + LTO_SYMBOL_SCOPE_PROTECTED = 0x00002000, LTO_SYMBOL_SCOPE_DEFAULT = 0x00001800, LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN = 0x00002800 } lto_symbol_attributes; @@ -88,7 +95,7 @@ lto_module_is_object_file(const char* path); * Checks if a file is a loadable object compiled for requested target. */ extern bool -lto_module_is_object_file_for_target(const char* path, +lto_module_is_object_file_for_target(const char* path, const char* target_triple_prefix); @@ -103,7 +110,7 @@ lto_module_is_object_file_in_memory(const void* mem, size_t length); * Checks if a buffer is a loadable object compiled for requested target. */ extern bool -lto_module_is_object_file_in_memory_for_target(const void* mem, size_t length, +lto_module_is_object_file_in_memory_for_target(const void* mem, size_t length, const char* target_triple_prefix); @@ -243,6 +250,12 @@ extern void lto_codegen_set_assembler_args(lto_code_gen_t cg, const char **args, int nargs); +/** + * Enables the internalize pass during LTO optimizations. + */ +extern void +lto_codegen_set_whole_program_optimization(lto_code_gen_t cg); + /** * Adds to a list of all global symbols that must exist in the final * generated code. If a function is not listed, it might be @@ -251,7 +264,6 @@ lto_codegen_set_assembler_args(lto_code_gen_t cg, const char **args, extern void lto_codegen_add_must_preserve_symbol(lto_code_gen_t cg, const char* symbol); - /** * Writes a new object file at the specified path that contains the * merged contents of all modules added so far. @@ -260,11 +272,10 @@ lto_codegen_add_must_preserve_symbol(lto_code_gen_t cg, const char* symbol); extern bool lto_codegen_write_merged_modules(lto_code_gen_t cg, const char* path); - /** * Generates code for all added modules into one native object file. * On success returns a pointer to a generated mach-o/ELF buffer and - * length set to the buffer size. The buffer is owned by the + * length set to the buffer size. The buffer is owned by the * lto_code_gen_t and will be freed when lto_codegen_dispose() * is called, or lto_codegen_compile() is called again. * On failure, returns NULL (check lto_get_error_message() for details). @@ -285,9 +296,13 @@ lto_codegen_compile_to_file(lto_code_gen_t cg, const char** name); */ extern void lto_codegen_debug_options(lto_code_gen_t cg, const char *); + #ifdef __cplusplus } #endif +/** + * @} + */ #endif diff --git a/contrib/llvm/include/llvm/ADT/APFloat.h b/contrib/llvm/include/llvm/ADT/APFloat.h index d2566a44bac9..2b466f900c81 100644 --- a/contrib/llvm/include/llvm/ADT/APFloat.h +++ b/contrib/llvm/include/llvm/ADT/APFloat.h @@ -320,6 +320,7 @@ namespace llvm { const fltSemantics &getSemantics() const { return *semantics; } bool isZero() const { return category == fcZero; } bool isNonZero() const { return category != fcZero; } + bool isNormal() const { return category == fcNormal; } bool isNaN() const { return category == fcNaN; } bool isInfinity() const { return category == fcInfinity; } bool isNegative() const { return sign; } @@ -328,8 +329,16 @@ namespace llvm { APFloat& operator=(const APFloat &); - /* Return an arbitrary integer value usable for hashing. */ - uint32_t getHashValue() const; + /// \brief Overload to compute a hash code for an APFloat value. + /// + /// Note that the use of hash codes for floating point values is in general + /// frought with peril. Equality is hard to define for these values. For + /// example, should negative and positive zero hash to different codes? Are + /// they equal or not? This hash value implementation specifically + /// emphasizes producing different codes for different inputs in order to + /// be used in canonicalization and memoization. As such, equality is + /// bitwiseIsEqual, and 0 != -0. + friend hash_code hash_value(const APFloat &Arg); /// Converts this value into a decimal string. /// diff --git a/contrib/llvm/include/llvm/ADT/APInt.h b/contrib/llvm/include/llvm/ADT/APInt.h index 707e0dbb6b91..41019899766b 100644 --- a/contrib/llvm/include/llvm/ADT/APInt.h +++ b/contrib/llvm/include/llvm/ADT/APInt.h @@ -23,11 +23,12 @@ #include namespace llvm { - class Serializer; class Deserializer; class FoldingSetNodeID; - class raw_ostream; + class Serializer; class StringRef; + class hash_code; + class raw_ostream; template class SmallVectorImpl; @@ -497,15 +498,13 @@ class APInt { if (loBitsSet == APINT_BITS_PER_WORD) return APInt(numBits, -1ULL); // For small values, return quickly. - if (numBits < APINT_BITS_PER_WORD) - return APInt(numBits, (1ULL << loBitsSet) - 1); + if (loBitsSet <= APINT_BITS_PER_WORD) + return APInt(numBits, -1ULL >> (APINT_BITS_PER_WORD - loBitsSet)); return getAllOnesValue(numBits).lshr(numBits - loBitsSet); } - /// The hash value is computed as the sum of the words and the bit width. - /// @returns A hash value computed from the sum of the APInt words. - /// @brief Get a hash value based on this APInt - uint64_t getHashValue() const; + /// \brief Overload to compute a hash_code for an APInt value. + friend hash_code hash_value(const APInt &Arg); /// This function returns a pointer to the internal storage of the APInt. /// This is useful for writing out the APInt in binary form without any @@ -562,7 +561,15 @@ class APInt { /// Performs logical negation operation on this APInt. /// @returns true if *this is zero, false otherwise. /// @brief Logical negation operator. - bool operator!() const; + bool operator!() const { + if (isSingleWord()) + return !VAL; + + for (unsigned i = 0; i != getNumWords(); ++i) + if (pVal[i]) + return false; + return true; + } /// @} /// @name Assignment Operators @@ -835,7 +842,11 @@ class APInt { /// @returns the bit value at bitPosition /// @brief Array-indexing support. - bool operator[](unsigned bitPosition) const; + bool operator[](unsigned bitPosition) const { + assert(bitPosition < getBitWidth() && "Bit position out of bounds!"); + return (maskBit(bitPosition) & + (isSingleWord() ? VAL : pVal[whichWord(bitPosition)])) != 0; + } /// @} /// @name Comparison Operators @@ -1056,6 +1067,16 @@ class APInt { /// @brief Zero extend or truncate to width APInt zextOrTrunc(unsigned width) const; + /// Make this APInt have the bit width given by \p width. The value is sign + /// extended, or left alone to make it that width. + /// @brief Sign extend or truncate to width + APInt sextOrSelf(unsigned width) const; + + /// Make this APInt have the bit width given by \p width. The value is zero + /// extended, or left alone to make it that width. + /// @brief Zero extend or truncate to width + APInt zextOrSelf(unsigned width) const; + /// @} /// @name Bit Manipulation Operators /// @{ diff --git a/contrib/llvm/include/llvm/ADT/ArrayRef.h b/contrib/llvm/include/llvm/ADT/ArrayRef.h index 33a8c651b23a..f4c8e5586213 100644 --- a/contrib/llvm/include/llvm/ADT/ArrayRef.h +++ b/contrib/llvm/include/llvm/ADT/ArrayRef.h @@ -14,8 +14,7 @@ #include namespace llvm { - class APInt; - + /// ArrayRef - Represent a constant reference to an array (0 or more elements /// consecutively in memory), i.e. a start pointer and a length. It allows /// various APIs to take consecutive elements easily and conveniently. @@ -33,33 +32,33 @@ namespace llvm { typedef const T *iterator; typedef const T *const_iterator; typedef size_t size_type; - + private: /// The start of the array, in an external buffer. const T *Data; - + /// The number of elements. size_type Length; - + public: /// @name Constructors /// @{ - + /// Construct an empty ArrayRef. /*implicit*/ ArrayRef() : Data(0), Length(0) {} - + /// Construct an ArrayRef from a single element. /*implicit*/ ArrayRef(const T &OneElt) : Data(&OneElt), Length(1) {} - + /// Construct an ArrayRef from a pointer and length. /*implicit*/ ArrayRef(const T *data, size_t length) : Data(data), Length(length) {} - + /// Construct an ArrayRef from a range. ArrayRef(const T *begin, const T *end) : Data(begin), Length(end - begin) {} - + /// Construct an ArrayRef from a SmallVector. /*implicit*/ ArrayRef(const SmallVectorImpl &Vec) : Data(Vec.data()), Length(Vec.size()) {} @@ -67,39 +66,39 @@ namespace llvm { /// Construct an ArrayRef from a std::vector. /*implicit*/ ArrayRef(const std::vector &Vec) : Data(Vec.empty() ? (T*)0 : &Vec[0]), Length(Vec.size()) {} - + /// Construct an ArrayRef from a C array. template /*implicit*/ ArrayRef(const T (&Arr)[N]) : Data(Arr), Length(N) {} - + /// @} /// @name Simple Operations /// @{ iterator begin() const { return Data; } iterator end() const { return Data + Length; } - + /// empty - Check if the array is empty. bool empty() const { return Length == 0; } - + const T *data() const { return Data; } - + /// size - Get the array size. size_t size() const { return Length; } - + /// front - Get the first element. const T &front() const { assert(!empty()); return Data[0]; } - + /// back - Get the last element. const T &back() const { assert(!empty()); return Data[Length-1]; } - + /// equals - Check for element-wise equality. bool equals(ArrayRef RHS) const { if (Length != RHS.Length) @@ -111,18 +110,18 @@ namespace llvm { } /// slice(n) - Chop off the first N elements of the array. - ArrayRef slice(unsigned N) { + ArrayRef slice(unsigned N) const { assert(N <= size() && "Invalid specifier"); return ArrayRef(data()+N, size()-N); } /// slice(n, m) - Chop off the first N elements of the array, and keep M /// elements in the array. - ArrayRef slice(unsigned N, unsigned M) { + ArrayRef slice(unsigned N, unsigned M) const { assert(N+M <= size() && "Invalid specifier"); return ArrayRef(data()+N, M); } - + /// @} /// @name Operator Overloads /// @{ @@ -130,22 +129,104 @@ namespace llvm { assert(Index < Length && "Invalid index!"); return Data[Index]; } - + /// @} /// @name Expensive Operations /// @{ std::vector vec() const { return std::vector(Data, Data+Length); } - + /// @} /// @name Conversion operators /// @{ operator std::vector() const { return std::vector(Data, Data+Length); } + + /// @} + }; + + /// MutableArrayRef - Represent a mutable reference to an array (0 or more + /// elements consecutively in memory), i.e. a start pointer and a length. It + /// allows various APIs to take and modify consecutive elements easily and + /// conveniently. + /// + /// This class does not own the underlying data, it is expected to be used in + /// situations where the data resides in some other buffer, whose lifetime + /// extends past that of the MutableArrayRef. For this reason, it is not in + /// general safe to store a MutableArrayRef. + /// + /// This is intended to be trivially copyable, so it should be passed by + /// value. + template + class MutableArrayRef : public ArrayRef { + public: + typedef T *iterator; + + /// Construct an empty ArrayRef. + /*implicit*/ MutableArrayRef() : ArrayRef() {} + + /// Construct an MutableArrayRef from a single element. + /*implicit*/ MutableArrayRef(T &OneElt) : ArrayRef(OneElt) {} + + /// Construct an MutableArrayRef from a pointer and length. + /*implicit*/ MutableArrayRef(T *data, size_t length) + : ArrayRef(data, length) {} + + /// Construct an MutableArrayRef from a range. + MutableArrayRef(T *begin, T *end) : ArrayRef(begin, end) {} + + /// Construct an MutableArrayRef from a SmallVector. + /*implicit*/ MutableArrayRef(SmallVectorImpl &Vec) + : ArrayRef(Vec) {} + + /// Construct a MutableArrayRef from a std::vector. + /*implicit*/ MutableArrayRef(std::vector &Vec) + : ArrayRef(Vec) {} + + /// Construct an MutableArrayRef from a C array. + template + /*implicit*/ MutableArrayRef(T (&Arr)[N]) + : ArrayRef(Arr) {} + + T *data() const { return const_cast(ArrayRef::data()); } + + iterator begin() const { return data(); } + iterator end() const { return data() + this->size(); } + + /// front - Get the first element. + T &front() const { + assert(!this->empty()); + return data()[0]; + } + + /// back - Get the last element. + T &back() const { + assert(!this->empty()); + return data()[this->size()-1]; + } + + /// slice(n) - Chop off the first N elements of the array. + MutableArrayRef slice(unsigned N) const { + assert(N <= this->size() && "Invalid specifier"); + return MutableArrayRef(data()+N, this->size()-N); + } + + /// slice(n, m) - Chop off the first N elements of the array, and keep M + /// elements in the array. + MutableArrayRef slice(unsigned N, unsigned M) const { + assert(N+M <= this->size() && "Invalid specifier"); + return MutableArrayRef(data()+N, M); + } /// @} + /// @name Operator Overloads + /// @{ + T &operator[](size_t Index) const { + assert(Index < this->size() && "Invalid index!"); + return data()[Index]; + } }; /// @name ArrayRef Convenience constructors @@ -215,5 +296,5 @@ namespace llvm { static const bool value = true; }; } - + #endif diff --git a/contrib/llvm/include/llvm/ADT/BitVector.h b/contrib/llvm/include/llvm/ADT/BitVector.h index ac1cf0c79a8f..7e0b5ba37196 100644 --- a/contrib/llvm/include/llvm/ADT/BitVector.h +++ b/contrib/llvm/include/llvm/ADT/BitVector.h @@ -14,12 +14,12 @@ #ifndef LLVM_ADT_BITVECTOR_H #define LLVM_ADT_BITVECTOR_H +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include #include #include #include -#include namespace llvm { @@ -116,7 +116,7 @@ class BitVector { else if (sizeof(BitWord) == 8) NumBits += CountPopulation_64(Bits[i]); else - assert(0 && "Unsupported!"); + llvm_unreachable("Unsupported!"); return NumBits; } @@ -146,10 +146,9 @@ class BitVector { if (Bits[i] != 0) { if (sizeof(BitWord) == 4) return i * BITWORD_SIZE + CountTrailingZeros_32((uint32_t)Bits[i]); - else if (sizeof(BitWord) == 8) + if (sizeof(BitWord) == 8) return i * BITWORD_SIZE + CountTrailingZeros_64(Bits[i]); - else - assert(0 && "Unsupported!"); + llvm_unreachable("Unsupported!"); } return -1; } @@ -170,10 +169,9 @@ class BitVector { if (Copy != 0) { if (sizeof(BitWord) == 4) return WordPos * BITWORD_SIZE + CountTrailingZeros_32((uint32_t)Copy); - else if (sizeof(BitWord) == 8) + if (sizeof(BitWord) == 8) return WordPos * BITWORD_SIZE + CountTrailingZeros_64(Copy); - else - assert(0 && "Unsupported!"); + llvm_unreachable("Unsupported!"); } // Check subsequent words. @@ -181,10 +179,9 @@ class BitVector { if (Bits[i] != 0) { if (sizeof(BitWord) == 4) return i * BITWORD_SIZE + CountTrailingZeros_32((uint32_t)Bits[i]); - else if (sizeof(BitWord) == 8) + if (sizeof(BitWord) == 8) return i * BITWORD_SIZE + CountTrailingZeros_64(Bits[i]); - else - assert(0 && "Unsupported!"); + llvm_unreachable("Unsupported!"); } return -1; } @@ -318,6 +315,16 @@ class BitVector { return *this; } + // reset - Reset bits that are set in RHS. Same as *this &= ~RHS. + BitVector &reset(const BitVector &RHS) { + unsigned ThisWords = NumBitWords(size()); + unsigned RHSWords = NumBitWords(RHS.size()); + unsigned i; + for (i = 0; i != std::min(ThisWords, RHSWords); ++i) + Bits[i] &= ~RHS.Bits[i]; + return *this; + } + BitVector &operator|=(const BitVector &RHS) { if (size() < RHS.size()) resize(RHS.size()); @@ -365,6 +372,42 @@ class BitVector { std::swap(Capacity, RHS.Capacity); } + //===--------------------------------------------------------------------===// + // Portable bit mask operations. + //===--------------------------------------------------------------------===// + // + // These methods all operate on arrays of uint32_t, each holding 32 bits. The + // fixed word size makes it easier to work with literal bit vector constants + // in portable code. + // + // The LSB in each word is the lowest numbered bit. The size of a portable + // bit mask is always a whole multiple of 32 bits. If no bit mask size is + // given, the bit mask is assumed to cover the entire BitVector. + + /// setBitsInMask - Add '1' bits from Mask to this vector. Don't resize. + /// This computes "*this |= Mask". + void setBitsInMask(const uint32_t *Mask, unsigned MaskWords = ~0u) { + applyMask(Mask, MaskWords); + } + + /// clearBitsInMask - Clear any bits in this vector that are set in Mask. + /// Don't resize. This computes "*this &= ~Mask". + void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords = ~0u) { + applyMask(Mask, MaskWords); + } + + /// setBitsNotInMask - Add a bit to this vector for every '0' bit in Mask. + /// Don't resize. This computes "*this |= ~Mask". + void setBitsNotInMask(const uint32_t *Mask, unsigned MaskWords = ~0u) { + applyMask(Mask, MaskWords); + } + + /// clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask. + /// Don't resize. This computes "*this &= Mask". + void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords = ~0u) { + applyMask(Mask, MaskWords); + } + private: unsigned NumBitWords(unsigned S) const { return (S + BITWORD_SIZE-1) / BITWORD_SIZE; @@ -400,6 +443,33 @@ class BitVector { void init_words(BitWord *B, unsigned NumWords, bool t) { memset(B, 0 - (int)t, NumWords*sizeof(BitWord)); } + + template + void applyMask(const uint32_t *Mask, unsigned MaskWords) { + assert(BITWORD_SIZE % 32 == 0 && "Unsupported BitWord size."); + MaskWords = std::min(MaskWords, (size() + 31) / 32); + const unsigned Scale = BITWORD_SIZE / 32; + unsigned i; + for (i = 0; MaskWords >= Scale; ++i, MaskWords -= Scale) { + BitWord BW = Bits[i]; + // This inner loop should unroll completely when BITWORD_SIZE > 32. + for (unsigned b = 0; b != BITWORD_SIZE; b += 32) { + uint32_t M = *Mask++; + if (InvertMask) M = ~M; + if (AddBits) BW |= BitWord(M) << b; + else BW &= ~(BitWord(M) << b); + } + Bits[i] = BW; + } + for (unsigned b = 0; MaskWords; b += 32, --MaskWords) { + uint32_t M = *Mask++; + if (InvertMask) M = ~M; + if (AddBits) Bits[i] |= BitWord(M) << b; + else Bits[i] &= ~(BitWord(M) << b); + } + if (AddBits) + clear_unused_bits(); + } }; inline BitVector operator&(const BitVector &LHS, const BitVector &RHS) { diff --git a/contrib/llvm/include/llvm/ADT/DAGDeltaAlgorithm.h b/contrib/llvm/include/llvm/ADT/DAGDeltaAlgorithm.h index 99ed15c0d60f..e502ac4348d0 100644 --- a/contrib/llvm/include/llvm/ADT/DAGDeltaAlgorithm.h +++ b/contrib/llvm/include/llvm/ADT/DAGDeltaAlgorithm.h @@ -36,6 +36,7 @@ namespace llvm { /// for more information on the properties which the predicate function itself /// should satisfy. class DAGDeltaAlgorithm { + virtual void anchor(); public: typedef unsigned change_ty; typedef std::pair edge_ty; diff --git a/contrib/llvm/include/llvm/ADT/DenseMap.h b/contrib/llvm/include/llvm/ADT/DenseMap.h index e70cacf3ca5b..8d4a19d0919c 100644 --- a/contrib/llvm/include/llvm/ADT/DenseMap.h +++ b/contrib/llvm/include/llvm/ADT/DenseMap.h @@ -30,12 +30,11 @@ namespace llvm { template, - typename ValueInfoT = DenseMapInfo, bool IsConst = false> + bool IsConst = false> class DenseMapIterator; template, - typename ValueInfoT = DenseMapInfo > + typename KeyInfoT = DenseMapInfo > class DenseMap { typedef std::pair BucketT; unsigned NumBuckets; @@ -80,19 +79,19 @@ class DenseMap { typedef DenseMapIterator iterator; typedef DenseMapIterator const_iterator; + KeyInfoT, true> const_iterator; inline iterator begin() { // When the map is empty, avoid the overhead of AdvancePastEmptyBuckets(). return empty() ? end() : iterator(Buckets, Buckets+NumBuckets); } inline iterator end() { - return iterator(Buckets+NumBuckets, Buckets+NumBuckets); + return iterator(Buckets+NumBuckets, Buckets+NumBuckets, true); } inline const_iterator begin() const { return empty() ? end() : const_iterator(Buckets, Buckets+NumBuckets); } inline const_iterator end() const { - return const_iterator(Buckets+NumBuckets, Buckets+NumBuckets); + return const_iterator(Buckets+NumBuckets, Buckets+NumBuckets, true); } bool empty() const { return NumEntries == 0; } @@ -137,13 +136,33 @@ class DenseMap { iterator find(const KeyT &Val) { BucketT *TheBucket; if (LookupBucketFor(Val, TheBucket)) - return iterator(TheBucket, Buckets+NumBuckets); + return iterator(TheBucket, Buckets+NumBuckets, true); return end(); } const_iterator find(const KeyT &Val) const { BucketT *TheBucket; if (LookupBucketFor(Val, TheBucket)) - return const_iterator(TheBucket, Buckets+NumBuckets); + return const_iterator(TheBucket, Buckets+NumBuckets, true); + return end(); + } + + /// Alternate version of find() which allows a different, and possibly + /// less expensive, key type. + /// The DenseMapInfo is responsible for supplying methods + /// getHashValue(LookupKeyT) and isEqual(LookupKeyT, KeyT) for each key + /// type used. + template + iterator find_as(const LookupKeyT &Val) { + BucketT *TheBucket; + if (LookupBucketFor(Val, TheBucket)) + return iterator(TheBucket, Buckets+NumBuckets, true); + return end(); + } + template + const_iterator find_as(const LookupKeyT &Val) const { + BucketT *TheBucket; + if (LookupBucketFor(Val, TheBucket)) + return const_iterator(TheBucket, Buckets+NumBuckets, true); return end(); } @@ -162,13 +181,12 @@ class DenseMap { std::pair insert(const std::pair &KV) { BucketT *TheBucket; if (LookupBucketFor(KV.first, TheBucket)) - return std::make_pair(iterator(TheBucket, Buckets+NumBuckets), + return std::make_pair(iterator(TheBucket, Buckets+NumBuckets, true), false); // Already in map. // Otherwise, insert the new element. TheBucket = InsertIntoBucket(KV.first, KV.second, TheBucket); - return std::make_pair(iterator(TheBucket, Buckets+NumBuckets), - true); + return std::make_pair(iterator(TheBucket, Buckets+NumBuckets, true), true); } /// insert - Range insertion of pairs. @@ -237,7 +255,7 @@ class DenseMap { private: void CopyFrom(const DenseMap& other) { if (NumBuckets != 0 && - (!isPodLike::value || !isPodLike::value)) { + (!isPodLike::value || !isPodLike::value)) { const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey(); for (BucketT *P = Buckets, *E = Buckets+NumBuckets; P != E; ++P) { if (!KeyInfoT::isEqual(P->first, EmptyKey) && @@ -266,7 +284,7 @@ class DenseMap { Buckets = static_cast(operator new(sizeof(BucketT) * NumBuckets)); - if (isPodLike::value && isPodLike::value) + if (isPodLike::value && isPodLike::value) memcpy(Buckets, other.Buckets, NumBuckets * sizeof(BucketT)); else for (size_t i = 0; i < NumBuckets; ++i) { @@ -310,6 +328,10 @@ class DenseMap { static unsigned getHashValue(const KeyT &Val) { return KeyInfoT::getHashValue(Val); } + template + static unsigned getHashValue(const LookupKeyT &Val) { + return KeyInfoT::getHashValue(Val); + } static const KeyT getEmptyKey() { return KeyInfoT::getEmptyKey(); } @@ -321,7 +343,8 @@ class DenseMap { /// FoundBucket. If the bucket contains the key and a value, this returns /// true, otherwise it returns a bucket with an empty marker or tombstone and /// returns false. - bool LookupBucketFor(const KeyT &Val, BucketT *&FoundBucket) const { + template + bool LookupBucketFor(const LookupKeyT &Val, BucketT *&FoundBucket) const { unsigned BucketNo = getHashValue(Val); unsigned ProbeAmt = 1; BucketT *BucketsPtr = Buckets; @@ -342,7 +365,7 @@ class DenseMap { while (1) { BucketT *ThisBucket = BucketsPtr + (BucketNo & (NumBuckets-1)); // Found Val's bucket? If so, return it. - if (KeyInfoT::isEqual(ThisBucket->first, Val)) { + if (KeyInfoT::isEqual(Val, ThisBucket->first)) { FoundBucket = ThisBucket; return true; } @@ -478,12 +501,12 @@ class DenseMap { }; template + typename KeyInfoT, bool IsConst> class DenseMapIterator { typedef std::pair Bucket; typedef DenseMapIterator ConstIterator; - friend class DenseMapIterator; + KeyInfoT, true> ConstIterator; + friend class DenseMapIterator; public: typedef ptrdiff_t difference_type; typedef typename conditional::type value_type; @@ -495,15 +518,16 @@ class DenseMapIterator { public: DenseMapIterator() : Ptr(0), End(0) {} - DenseMapIterator(pointer Pos, pointer E) : Ptr(Pos), End(E) { - AdvancePastEmptyBuckets(); + DenseMapIterator(pointer Pos, pointer E, bool NoAdvance = false) + : Ptr(Pos), End(E) { + if (!NoAdvance) AdvancePastEmptyBuckets(); } // If IsConst is true this is a converting constructor from iterator to // const_iterator and the default copy constructor is used. // Otherwise this is a copy constructor for iterator. DenseMapIterator(const DenseMapIterator& I) + KeyInfoT, false>& I) : Ptr(I.Ptr), End(I.End) {} reference operator*() const { @@ -541,9 +565,9 @@ class DenseMapIterator { } }; -template +template static inline size_t -capacity_in_bytes(const DenseMap &X) { +capacity_in_bytes(const DenseMap &X) { return X.getMemorySize(); } diff --git a/contrib/llvm/include/llvm/ADT/DenseMapInfo.h b/contrib/llvm/include/llvm/ADT/DenseMapInfo.h index df4084e6f411..1559a35c39f9 100644 --- a/contrib/llvm/include/llvm/ADT/DenseMapInfo.h +++ b/contrib/llvm/include/llvm/ADT/DenseMapInfo.h @@ -59,7 +59,7 @@ template<> struct DenseMapInfo { // Provide DenseMapInfo for unsigned ints. template<> struct DenseMapInfo { - static inline unsigned getEmptyKey() { return ~0; } + static inline unsigned getEmptyKey() { return ~0U; } static inline unsigned getTombstoneKey() { return ~0U - 1; } static unsigned getHashValue(const unsigned& Val) { return Val * 37U; } static bool isEqual(const unsigned& LHS, const unsigned& RHS) { diff --git a/contrib/llvm/include/llvm/ADT/FoldingSet.h b/contrib/llvm/include/llvm/ADT/FoldingSet.h index d2e0b8f91b2c..7d7c77770020 100644 --- a/contrib/llvm/include/llvm/ADT/FoldingSet.h +++ b/contrib/llvm/include/llvm/ADT/FoldingSet.h @@ -193,12 +193,11 @@ class FoldingSetImpl { virtual void GetNodeProfile(Node *N, FoldingSetNodeID &ID) const = 0; /// NodeEquals - Instantiations of the FoldingSet template implement /// this function to compare the given node with the given ID. - virtual bool NodeEquals(Node *N, const FoldingSetNodeID &ID, + virtual bool NodeEquals(Node *N, const FoldingSetNodeID &ID, unsigned IDHash, FoldingSetNodeID &TempID) const=0; - /// NodeEquals - Instantiations of the FoldingSet template implement + /// ComputeNodeHash - Instantiations of the FoldingSet template implement /// this function to compute a hash value for the given node. - virtual unsigned ComputeNodeHash(Node *N, - FoldingSetNodeID &TempID) const = 0; + virtual unsigned ComputeNodeHash(Node *N, FoldingSetNodeID &TempID) const = 0; }; //===----------------------------------------------------------------------===// @@ -220,7 +219,7 @@ template struct DefaultFoldingSetTrait { // to compute a temporary ID if necessary. The default implementation // just calls Profile and does a regular comparison. Implementations // can override this to provide more efficient implementations. - static inline bool Equals(T &X, const FoldingSetNodeID &ID, + static inline bool Equals(T &X, const FoldingSetNodeID &ID, unsigned IDHash, FoldingSetNodeID &TempID); // ComputeHash - Compute a hash value for X, using TempID to @@ -249,7 +248,7 @@ struct DefaultContextualFoldingSetTrait { static void Profile(T &X, FoldingSetNodeID &ID, Ctx Context) { X.Profile(ID, Context); } - static inline bool Equals(T &X, const FoldingSetNodeID &ID, + static inline bool Equals(T &X, const FoldingSetNodeID &ID, unsigned IDHash, FoldingSetNodeID &TempID, Ctx Context); static inline unsigned ComputeHash(T &X, FoldingSetNodeID &TempID, Ctx Context); @@ -344,7 +343,7 @@ template class FoldingSetBucketIterator; template inline bool DefaultFoldingSetTrait::Equals(T &X, const FoldingSetNodeID &ID, - FoldingSetNodeID &TempID) { + unsigned IDHash, FoldingSetNodeID &TempID) { FoldingSetTrait::Profile(X, TempID); return TempID == ID; } @@ -358,6 +357,7 @@ template inline bool DefaultContextualFoldingSetTrait::Equals(T &X, const FoldingSetNodeID &ID, + unsigned IDHash, FoldingSetNodeID &TempID, Ctx Context) { ContextualFoldingSetTrait::Profile(X, TempID, Context); @@ -387,15 +387,14 @@ template class FoldingSet : public FoldingSetImpl { } /// NodeEquals - Instantiations may optionally provide a way to compare a /// node with a specified ID. - virtual bool NodeEquals(Node *N, const FoldingSetNodeID &ID, + virtual bool NodeEquals(Node *N, const FoldingSetNodeID &ID, unsigned IDHash, FoldingSetNodeID &TempID) const { T *TN = static_cast(N); - return FoldingSetTrait::Equals(*TN, ID, TempID); + return FoldingSetTrait::Equals(*TN, ID, IDHash, TempID); } - /// NodeEquals - Instantiations may optionally provide a way to compute a + /// ComputeNodeHash - Instantiations may optionally provide a way to compute a /// hash value directly from a node. - virtual unsigned ComputeNodeHash(Node *N, - FoldingSetNodeID &TempID) const { + virtual unsigned ComputeNodeHash(Node *N, FoldingSetNodeID &TempID) const { T *TN = static_cast(N); return FoldingSetTrait::ComputeHash(*TN, TempID); } @@ -465,10 +464,11 @@ class ContextualFoldingSet : public FoldingSetImpl { ContextualFoldingSetTrait::Profile(*TN, ID, Context); } virtual bool NodeEquals(FoldingSetImpl::Node *N, - const FoldingSetNodeID &ID, + const FoldingSetNodeID &ID, unsigned IDHash, FoldingSetNodeID &TempID) const { T *TN = static_cast(N); - return ContextualFoldingSetTrait::Equals(*TN, ID, TempID, Context); + return ContextualFoldingSetTrait::Equals(*TN, ID, IDHash, TempID, + Context); } virtual unsigned ComputeNodeHash(FoldingSetImpl::Node *N, FoldingSetNodeID &TempID) const { diff --git a/contrib/llvm/include/llvm/ADT/GraphTraits.h b/contrib/llvm/include/llvm/ADT/GraphTraits.h index 0fd1f5022af7..823caef7647e 100644 --- a/contrib/llvm/include/llvm/ADT/GraphTraits.h +++ b/contrib/llvm/include/llvm/ADT/GraphTraits.h @@ -43,9 +43,12 @@ struct GraphTraits { // typedef ...iterator nodes_iterator; // static nodes_iterator nodes_begin(GraphType *G) // static nodes_iterator nodes_end (GraphType *G) - // // nodes_iterator/begin/end - Allow iteration over all nodes in the graph + // static unsigned size (GraphType *G) + // Return total number of nodes in the graph + // + // If anyone tries to use this class without having an appropriate // specialization, make an error. If you get this error, it's because you diff --git a/contrib/llvm/include/llvm/ADT/Hashing.h b/contrib/llvm/include/llvm/ADT/Hashing.h new file mode 100644 index 000000000000..53032ee538d2 --- /dev/null +++ b/contrib/llvm/include/llvm/ADT/Hashing.h @@ -0,0 +1,770 @@ +//===-- llvm/ADT/Hashing.h - Utilities for hashing --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the newly proposed standard C++ interfaces for hashing +// arbitrary data and building hash functions for user-defined types. This +// interface was originally proposed in N3333[1] and is currently under review +// for inclusion in a future TR and/or standard. +// +// The primary interfaces provide are comprised of one type and three functions: +// +// -- 'hash_code' class is an opaque type representing the hash code for some +// data. It is the intended product of hashing, and can be used to implement +// hash tables, checksumming, and other common uses of hashes. It is not an +// integer type (although it can be converted to one) because it is risky +// to assume much about the internals of a hash_code. In particular, each +// execution of the program has a high probability of producing a different +// hash_code for a given input. Thus their values are not stable to save or +// persist, and should only be used during the execution for the +// construction of hashing datastructures. +// +// -- 'hash_value' is a function designed to be overloaded for each +// user-defined type which wishes to be used within a hashing context. It +// should be overloaded within the user-defined type's namespace and found +// via ADL. Overloads for primitive types are provided by this library. +// +// -- 'hash_combine' and 'hash_combine_range' are functions designed to aid +// programmers in easily and intuitively combining a set of data into +// a single hash_code for their object. They should only logically be used +// within the implementation of a 'hash_value' routine or similar context. +// +// Note that 'hash_combine_range' contains very special logic for hashing +// a contiguous array of integers or pointers. This logic is *extremely* fast, +// on a modern Intel "Gainestown" Xeon (Nehalem uarch) @2.2 GHz, these were +// benchmarked at over 6.5 GiB/s for large keys, and <20 cycles/hash for keys +// under 32-bytes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_HASHING_H +#define LLVM_ADT_HASHING_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/SwapByteOrder.h" +#include "llvm/Support/type_traits.h" +#include +#include +#include +#include +#include + +// Allow detecting C++11 feature availability when building with Clang without +// breaking other compilers. +#ifndef __has_feature +# define __has_feature(x) 0 +#endif + +namespace llvm { + +/// \brief An opaque object representing a hash code. +/// +/// This object represents the result of hashing some entity. It is intended to +/// be used to implement hashtables or other hashing-based data structures. +/// While it wraps and exposes a numeric value, this value should not be +/// trusted to be stable or predictable across processes or executions. +/// +/// In order to obtain the hash_code for an object 'x': +/// \code +/// using llvm::hash_value; +/// llvm::hash_code code = hash_value(x); +/// \endcode +/// +/// Also note that there are two numerical values which are reserved, and the +/// implementation ensures will never be produced for real hash_codes. These +/// can be used as sentinels within hashing data structures. +class hash_code { + size_t value; + +public: + /// \brief Default construct a hash_code. + /// Note that this leaves the value uninitialized. + hash_code() {} + + /// \brief Form a hash code directly from a numerical value. + hash_code(size_t value) : value(value) {} + + /// \brief Convert the hash code to its numerical value for use. + /*explicit*/ operator size_t() const { return value; } + + friend bool operator==(const hash_code &lhs, const hash_code &rhs) { + return lhs.value == rhs.value; + } + friend bool operator!=(const hash_code &lhs, const hash_code &rhs) { + return lhs.value != rhs.value; + } + + /// \brief Allow a hash_code to be directly run through hash_value. + friend size_t hash_value(const hash_code &code) { return code.value; } +}; + +/// \brief Compute a hash_code for any integer value. +/// +/// Note that this function is intended to compute the same hash_code for +/// a particular value without regard to the pre-promotion type. This is in +/// contrast to hash_combine which may produce different hash_codes for +/// differing argument types even if they would implicit promote to a common +/// type without changing the value. +template +typename enable_if, hash_code>::type hash_value(T value); + +/// \brief Compute a hash_code for a pointer's address. +/// +/// N.B.: This hashes the *address*. Not the value and not the type. +template hash_code hash_value(const T *ptr); + +/// \brief Compute a hash_code for a pair of objects. +template +hash_code hash_value(const std::pair &arg); + +/// \brief Compute a hash_code for a standard string. +template +hash_code hash_value(const std::basic_string &arg); + + +/// \brief Override the execution seed with a fixed value. +/// +/// This hashing library uses a per-execution seed designed to change on each +/// run with high probability in order to ensure that the hash codes are not +/// attackable and to ensure that output which is intended to be stable does +/// not rely on the particulars of the hash codes produced. +/// +/// That said, there are use cases where it is important to be able to +/// reproduce *exactly* a specific behavior. To that end, we provide a function +/// which will forcibly set the seed to a fixed value. This must be done at the +/// start of the program, before any hashes are computed. Also, it cannot be +/// undone. This makes it thread-hostile and very hard to use outside of +/// immediately on start of a simple program designed for reproducible +/// behavior. +void set_fixed_execution_hash_seed(size_t fixed_value); + + +// All of the implementation details of actually computing the various hash +// code values are held within this namespace. These routines are included in +// the header file mainly to allow inlining and constant propagation. +namespace hashing { +namespace detail { + +inline uint64_t fetch64(const char *p) { + uint64_t result; + memcpy(&result, p, sizeof(result)); + if (sys::isBigEndianHost()) + return sys::SwapByteOrder(result); + return result; +} + +inline uint32_t fetch32(const char *p) { + uint32_t result; + memcpy(&result, p, sizeof(result)); + if (sys::isBigEndianHost()) + return sys::SwapByteOrder(result); + return result; +} + +/// Some primes between 2^63 and 2^64 for various uses. +static const uint64_t k0 = 0xc3a5c85c97cb3127ULL; +static const uint64_t k1 = 0xb492b66fbe98f273ULL; +static const uint64_t k2 = 0x9ae16a3b2f90404fULL; +static const uint64_t k3 = 0xc949d7c7509e6557ULL; + +/// \brief Bitwise right rotate. +/// Normally this will compile to a single instruction, especially if the +/// shift is a manifest constant. +inline uint64_t rotate(uint64_t val, size_t shift) { + // Avoid shifting by 64: doing so yields an undefined result. + return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); +} + +inline uint64_t shift_mix(uint64_t val) { + return val ^ (val >> 47); +} + +inline uint64_t hash_16_bytes(uint64_t low, uint64_t high) { + // Murmur-inspired hashing. + const uint64_t kMul = 0x9ddfea08eb382d69ULL; + uint64_t a = (low ^ high) * kMul; + a ^= (a >> 47); + uint64_t b = (high ^ a) * kMul; + b ^= (b >> 47); + b *= kMul; + return b; +} + +inline uint64_t hash_1to3_bytes(const char *s, size_t len, uint64_t seed) { + uint8_t a = s[0]; + uint8_t b = s[len >> 1]; + uint8_t c = s[len - 1]; + uint32_t y = static_cast(a) + (static_cast(b) << 8); + uint32_t z = len + (static_cast(c) << 2); + return shift_mix(y * k2 ^ z * k3 ^ seed) * k2; +} + +inline uint64_t hash_4to8_bytes(const char *s, size_t len, uint64_t seed) { + uint64_t a = fetch32(s); + return hash_16_bytes(len + (a << 3), seed ^ fetch32(s + len - 4)); +} + +inline uint64_t hash_9to16_bytes(const char *s, size_t len, uint64_t seed) { + uint64_t a = fetch64(s); + uint64_t b = fetch64(s + len - 8); + return hash_16_bytes(seed ^ a, rotate(b + len, len)) ^ b; +} + +inline uint64_t hash_17to32_bytes(const char *s, size_t len, uint64_t seed) { + uint64_t a = fetch64(s) * k1; + uint64_t b = fetch64(s + 8); + uint64_t c = fetch64(s + len - 8) * k2; + uint64_t d = fetch64(s + len - 16) * k0; + return hash_16_bytes(rotate(a - b, 43) + rotate(c ^ seed, 30) + d, + a + rotate(b ^ k3, 20) - c + len + seed); +} + +inline uint64_t hash_33to64_bytes(const char *s, size_t len, uint64_t seed) { + uint64_t z = fetch64(s + 24); + uint64_t a = fetch64(s) + (len + fetch64(s + len - 16)) * k0; + uint64_t b = rotate(a + z, 52); + uint64_t c = rotate(a, 37); + a += fetch64(s + 8); + c += rotate(a, 7); + a += fetch64(s + 16); + uint64_t vf = a + z; + uint64_t vs = b + rotate(a, 31) + c; + a = fetch64(s + 16) + fetch64(s + len - 32); + z = fetch64(s + len - 8); + b = rotate(a + z, 52); + c = rotate(a, 37); + a += fetch64(s + len - 24); + c += rotate(a, 7); + a += fetch64(s + len - 16); + uint64_t wf = a + z; + uint64_t ws = b + rotate(a, 31) + c; + uint64_t r = shift_mix((vf + ws) * k2 + (wf + vs) * k0); + return shift_mix((seed ^ (r * k0)) + vs) * k2; +} + +inline uint64_t hash_short(const char *s, size_t length, uint64_t seed) { + if (length >= 4 && length <= 8) + return hash_4to8_bytes(s, length, seed); + if (length > 8 && length <= 16) + return hash_9to16_bytes(s, length, seed); + if (length > 16 && length <= 32) + return hash_17to32_bytes(s, length, seed); + if (length > 32) + return hash_33to64_bytes(s, length, seed); + if (length != 0) + return hash_1to3_bytes(s, length, seed); + + return k2 ^ seed; +} + +/// \brief The intermediate state used during hashing. +/// Currently, the algorithm for computing hash codes is based on CityHash and +/// keeps 56 bytes of arbitrary state. +struct hash_state { + uint64_t h0, h1, h2, h3, h4, h5, h6; + uint64_t seed; + + /// \brief Create a new hash_state structure and initialize it based on the + /// seed and the first 64-byte chunk. + /// This effectively performs the initial mix. + static hash_state create(const char *s, uint64_t seed) { + hash_state state = { + 0, seed, hash_16_bytes(seed, k1), rotate(seed ^ k1, 49), + seed * k1, shift_mix(seed), 0, seed }; + state.h6 = hash_16_bytes(state.h4, state.h5); + state.mix(s); + return state; + } + + /// \brief Mix 32-bytes from the input sequence into the 16-bytes of 'a' + /// and 'b', including whatever is already in 'a' and 'b'. + static void mix_32_bytes(const char *s, uint64_t &a, uint64_t &b) { + a += fetch64(s); + uint64_t c = fetch64(s + 24); + b = rotate(b + a + c, 21); + uint64_t d = a; + a += fetch64(s + 8) + fetch64(s + 16); + b += rotate(a, 44) + d; + a += c; + } + + /// \brief Mix in a 64-byte buffer of data. + /// We mix all 64 bytes even when the chunk length is smaller, but we + /// record the actual length. + void mix(const char *s) { + h0 = rotate(h0 + h1 + h3 + fetch64(s + 8), 37) * k1; + h1 = rotate(h1 + h4 + fetch64(s + 48), 42) * k1; + h0 ^= h6; + h1 += h3 + fetch64(s + 40); + h2 = rotate(h2 + h5, 33) * k1; + h3 = h4 * k1; + h4 = h0 + h5; + mix_32_bytes(s, h3, h4); + h5 = h2 + h6; + h6 = h1 + fetch64(s + 16); + mix_32_bytes(s + 32, h5, h6); + std::swap(h2, h0); + } + + /// \brief Compute the final 64-bit hash code value based on the current + /// state and the length of bytes hashed. + uint64_t finalize(size_t length) { + return hash_16_bytes(hash_16_bytes(h3, h5) + shift_mix(h1) * k1 + h2, + hash_16_bytes(h4, h6) + shift_mix(length) * k1 + h0); + } +}; + + +/// \brief A global, fixed seed-override variable. +/// +/// This variable can be set using the \see llvm::set_fixed_execution_seed +/// function. See that function for details. Do not, under any circumstances, +/// set or read this variable. +extern size_t fixed_seed_override; + +inline size_t get_execution_seed() { + // FIXME: This needs to be a per-execution seed. This is just a placeholder + // implementation. Switching to a per-execution seed is likely to flush out + // instability bugs and so will happen as its own commit. + // + // However, if there is a fixed seed override set the first time this is + // called, return that instead of the per-execution seed. + const uint64_t seed_prime = 0xff51afd7ed558ccdULL; + static size_t seed = fixed_seed_override ? fixed_seed_override + : (size_t)seed_prime; + return seed; +} + + +/// \brief Trait to indicate whether a type's bits can be hashed directly. +/// +/// A type trait which is true if we want to combine values for hashing by +/// reading the underlying data. It is false if values of this type must +/// first be passed to hash_value, and the resulting hash_codes combined. +// +// FIXME: We want to replace is_integral_or_enum and is_pointer here with +// a predicate which asserts that comparing the underlying storage of two +// values of the type for equality is equivalent to comparing the two values +// for equality. For all the platforms we care about, this holds for integers +// and pointers, but there are platforms where it doesn't and we would like to +// support user-defined types which happen to satisfy this property. +template struct is_hashable_data + : integral_constant::value || + is_pointer::value) && + 64 % sizeof(T) == 0)> {}; + +// Special case std::pair to detect when both types are viable and when there +// is no alignment-derived padding in the pair. This is a bit of a lie because +// std::pair isn't truly POD, but it's close enough in all reasonable +// implementations for our use case of hashing the underlying data. +template struct is_hashable_data > + : integral_constant::value && + is_hashable_data::value && + (sizeof(T) + sizeof(U)) == + sizeof(std::pair))> {}; + +/// \brief Helper to get the hashable data representation for a type. +/// This variant is enabled when the type itself can be used. +template +typename enable_if, T>::type +get_hashable_data(const T &value) { + return value; +} +/// \brief Helper to get the hashable data representation for a type. +/// This variant is enabled when we must first call hash_value and use the +/// result as our data. +template +typename enable_if_c::value, size_t>::type +get_hashable_data(const T &value) { + using ::llvm::hash_value; + return hash_value(value); +} + +/// \brief Helper to store data from a value into a buffer and advance the +/// pointer into that buffer. +/// +/// This routine first checks whether there is enough space in the provided +/// buffer, and if not immediately returns false. If there is space, it +/// copies the underlying bytes of value into the buffer, advances the +/// buffer_ptr past the copied bytes, and returns true. +template +bool store_and_advance(char *&buffer_ptr, char *buffer_end, const T& value, + size_t offset = 0) { + size_t store_size = sizeof(value) - offset; + if (buffer_ptr + store_size > buffer_end) + return false; + const char *value_data = reinterpret_cast(&value); + memcpy(buffer_ptr, value_data + offset, store_size); + buffer_ptr += store_size; + return true; +} + +/// \brief Implement the combining of integral values into a hash_code. +/// +/// This overload is selected when the value type of the iterator is +/// integral. Rather than computing a hash_code for each object and then +/// combining them, this (as an optimization) directly combines the integers. +template +hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last) { + typedef typename std::iterator_traits::value_type ValueT; + const size_t seed = get_execution_seed(); + char buffer[64], *buffer_ptr = buffer; + char *const buffer_end = buffer_ptr + array_lengthof(buffer); + while (first != last && store_and_advance(buffer_ptr, buffer_end, + get_hashable_data(*first))) + ++first; + if (first == last) + return hash_short(buffer, buffer_ptr - buffer, seed); + assert(buffer_ptr == buffer_end); + + hash_state state = state.create(buffer, seed); + size_t length = 64; + while (first != last) { + // Fill up the buffer. We don't clear it, which re-mixes the last round + // when only a partial 64-byte chunk is left. + buffer_ptr = buffer; + while (first != last && store_and_advance(buffer_ptr, buffer_end, + get_hashable_data(*first))) + ++first; + + // Rotate the buffer if we did a partial fill in order to simulate doing + // a mix of the last 64-bytes. That is how the algorithm works when we + // have a contiguous byte sequence, and we want to emulate that here. + std::rotate(buffer, buffer_ptr, buffer_end); + + // Mix this chunk into the current state. + state.mix(buffer); + length += buffer_ptr - buffer; + }; + + return state.finalize(length); +} + +/// \brief Implement the combining of integral values into a hash_code. +/// +/// This overload is selected when the value type of the iterator is integral +/// and when the input iterator is actually a pointer. Rather than computing +/// a hash_code for each object and then combining them, this (as an +/// optimization) directly combines the integers. Also, because the integers +/// are stored in contiguous memory, this routine avoids copying each value +/// and directly reads from the underlying memory. +template +typename enable_if, hash_code>::type +hash_combine_range_impl(ValueT *first, ValueT *last) { + const size_t seed = get_execution_seed(); + const char *s_begin = reinterpret_cast(first); + const char *s_end = reinterpret_cast(last); + const size_t length = std::distance(s_begin, s_end); + if (length <= 64) + return hash_short(s_begin, length, seed); + + const char *s_aligned_end = s_begin + (length & ~63); + hash_state state = state.create(s_begin, seed); + s_begin += 64; + while (s_begin != s_aligned_end) { + state.mix(s_begin); + s_begin += 64; + } + if (length & 63) + state.mix(s_end - 64); + + return state.finalize(length); +} + +} // namespace detail +} // namespace hashing + + +/// \brief Compute a hash_code for a sequence of values. +/// +/// This hashes a sequence of values. It produces the same hash_code as +/// 'hash_combine(a, b, c, ...)', but can run over arbitrary sized sequences +/// and is significantly faster given pointers and types which can be hashed as +/// a sequence of bytes. +template +hash_code hash_combine_range(InputIteratorT first, InputIteratorT last) { + return ::llvm::hashing::detail::hash_combine_range_impl(first, last); +} + + +// Implementation details for hash_combine. +namespace hashing { +namespace detail { + +/// \brief Helper class to manage the recursive combining of hash_combine +/// arguments. +/// +/// This class exists to manage the state and various calls involved in the +/// recursive combining of arguments used in hash_combine. It is particularly +/// useful at minimizing the code in the recursive calls to ease the pain +/// caused by a lack of variadic functions. +struct hash_combine_recursive_helper { + char buffer[64]; + hash_state state; + const size_t seed; + +public: + /// \brief Construct a recursive hash combining helper. + /// + /// This sets up the state for a recursive hash combine, including getting + /// the seed and buffer setup. + hash_combine_recursive_helper() + : seed(get_execution_seed()) {} + + /// \brief Combine one chunk of data into the current in-flight hash. + /// + /// This merges one chunk of data into the hash. First it tries to buffer + /// the data. If the buffer is full, it hashes the buffer into its + /// hash_state, empties it, and then merges the new chunk in. This also + /// handles cases where the data straddles the end of the buffer. + template + char *combine_data(size_t &length, char *buffer_ptr, char *buffer_end, T data) { + if (!store_and_advance(buffer_ptr, buffer_end, data)) { + // Check for skew which prevents the buffer from being packed, and do + // a partial store into the buffer to fill it. This is only a concern + // with the variadic combine because that formation can have varying + // argument types. + size_t partial_store_size = buffer_end - buffer_ptr; + memcpy(buffer_ptr, &data, partial_store_size); + + // If the store fails, our buffer is full and ready to hash. We have to + // either initialize the hash state (on the first full buffer) or mix + // this buffer into the existing hash state. Length tracks the *hashed* + // length, not the buffered length. + if (length == 0) { + state = state.create(buffer, seed); + length = 64; + } else { + // Mix this chunk into the current state and bump length up by 64. + state.mix(buffer); + length += 64; + } + // Reset the buffer_ptr to the head of the buffer for the next chunk of + // data. + buffer_ptr = buffer; + + // Try again to store into the buffer -- this cannot fail as we only + // store types smaller than the buffer. + if (!store_and_advance(buffer_ptr, buffer_end, data, + partial_store_size)) + abort(); + } + return buffer_ptr; + } + +#if defined(__has_feature) && __has_feature(__cxx_variadic_templates__) + + /// \brief Recursive, variadic combining method. + /// + /// This function recurses through each argument, combining that argument + /// into a single hash. + template + hash_code combine(size_t length, char *buffer_ptr, char *buffer_end, + const T &arg, const Ts &...args) { + buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg)); + + // Recurse to the next argument. + return combine(length, buffer_ptr, buffer_end, args...); + } + +#else + // Manually expanded recursive combining methods. See variadic above for + // documentation. + + template + hash_code combine(size_t length, char *buffer_ptr, char *buffer_end, + const T1 &arg1, const T2 &arg2, const T3 &arg3, + const T4 &arg4, const T5 &arg5, const T6 &arg6) { + buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg1)); + return combine(length, buffer_ptr, buffer_end, arg2, arg3, arg4, arg5, arg6); + } + template + hash_code combine(size_t length, char *buffer_ptr, char *buffer_end, + const T1 &arg1, const T2 &arg2, const T3 &arg3, + const T4 &arg4, const T5 &arg5) { + buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg1)); + return combine(length, buffer_ptr, buffer_end, arg2, arg3, arg4, arg5); + } + template + hash_code combine(size_t length, char *buffer_ptr, char *buffer_end, + const T1 &arg1, const T2 &arg2, const T3 &arg3, + const T4 &arg4) { + buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg1)); + return combine(length, buffer_ptr, buffer_end, arg2, arg3, arg4); + } + template + hash_code combine(size_t length, char *buffer_ptr, char *buffer_end, + const T1 &arg1, const T2 &arg2, const T3 &arg3) { + buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg1)); + return combine(length, buffer_ptr, buffer_end, arg2, arg3); + } + template + hash_code combine(size_t length, char *buffer_ptr, char *buffer_end, + const T1 &arg1, const T2 &arg2) { + buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg1)); + return combine(length, buffer_ptr, buffer_end, arg2); + } + template + hash_code combine(size_t length, char *buffer_ptr, char *buffer_end, + const T1 &arg1) { + buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg1)); + return combine(length, buffer_ptr, buffer_end); + } + +#endif + + /// \brief Base case for recursive, variadic combining. + /// + /// The base case when combining arguments recursively is reached when all + /// arguments have been handled. It flushes the remaining buffer and + /// constructs a hash_code. + hash_code combine(size_t length, char *buffer_ptr, char *buffer_end) { + // Check whether the entire set of values fit in the buffer. If so, we'll + // use the optimized short hashing routine and skip state entirely. + if (length == 0) + return hash_short(buffer, buffer_ptr - buffer, seed); + + // Mix the final buffer, rotating it if we did a partial fill in order to + // simulate doing a mix of the last 64-bytes. That is how the algorithm + // works when we have a contiguous byte sequence, and we want to emulate + // that here. + std::rotate(buffer, buffer_ptr, buffer_end); + + // Mix this chunk into the current state. + state.mix(buffer); + length += buffer_ptr - buffer; + + return state.finalize(length); + } +}; + +} // namespace detail +} // namespace hashing + + +#if __has_feature(__cxx_variadic_templates__) + +/// \brief Combine values into a single hash_code. +/// +/// This routine accepts a varying number of arguments of any type. It will +/// attempt to combine them into a single hash_code. For user-defined types it +/// attempts to call a \see hash_value overload (via ADL) for the type. For +/// integer and pointer types it directly combines their data into the +/// resulting hash_code. +/// +/// The result is suitable for returning from a user's hash_value +/// *implementation* for their user-defined type. Consumers of a type should +/// *not* call this routine, they should instead call 'hash_value'. +template hash_code hash_combine(const Ts &...args) { + // Recursively hash each argument using a helper class. + ::llvm::hashing::detail::hash_combine_recursive_helper helper; + return helper.combine(0, helper.buffer, helper.buffer + 64, args...); +} + +#else + +// What follows are manually exploded overloads for each argument width. See +// the above variadic definition for documentation and specification. + +template +hash_code hash_combine(const T1 &arg1, const T2 &arg2, const T3 &arg3, + const T4 &arg4, const T5 &arg5, const T6 &arg6) { + ::llvm::hashing::detail::hash_combine_recursive_helper helper; + return helper.combine(0, helper.buffer, helper.buffer + 64, + arg1, arg2, arg3, arg4, arg5, arg6); +} +template +hash_code hash_combine(const T1 &arg1, const T2 &arg2, const T3 &arg3, + const T4 &arg4, const T5 &arg5) { + ::llvm::hashing::detail::hash_combine_recursive_helper helper; + return helper.combine(0, helper.buffer, helper.buffer + 64, + arg1, arg2, arg3, arg4, arg5); +} +template +hash_code hash_combine(const T1 &arg1, const T2 &arg2, const T3 &arg3, + const T4 &arg4) { + ::llvm::hashing::detail::hash_combine_recursive_helper helper; + return helper.combine(0, helper.buffer, helper.buffer + 64, + arg1, arg2, arg3, arg4); +} +template +hash_code hash_combine(const T1 &arg1, const T2 &arg2, const T3 &arg3) { + ::llvm::hashing::detail::hash_combine_recursive_helper helper; + return helper.combine(0, helper.buffer, helper.buffer + 64, arg1, arg2, arg3); +} +template +hash_code hash_combine(const T1 &arg1, const T2 &arg2) { + ::llvm::hashing::detail::hash_combine_recursive_helper helper; + return helper.combine(0, helper.buffer, helper.buffer + 64, arg1, arg2); +} +template +hash_code hash_combine(const T1 &arg1) { + ::llvm::hashing::detail::hash_combine_recursive_helper helper; + return helper.combine(0, helper.buffer, helper.buffer + 64, arg1); +} + +#endif + + +// Implementation details for implementatinos of hash_value overloads provided +// here. +namespace hashing { +namespace detail { + +/// \brief Helper to hash the value of a single integer. +/// +/// Overloads for smaller integer types are not provided to ensure consistent +/// behavior in the presence of integral promotions. Essentially, +/// "hash_value('4')" and "hash_value('0' + 4)" should be the same. +inline hash_code hash_integer_value(uint64_t value) { + // Similar to hash_4to8_bytes but using a seed instead of length. + const uint64_t seed = get_execution_seed(); + const char *s = reinterpret_cast(&value); + const uint64_t a = fetch32(s); + return hash_16_bytes(seed + (a << 3), fetch32(s + 4)); +} + +} // namespace detail +} // namespace hashing + +// Declared and documented above, but defined here so that any of the hashing +// infrastructure is available. +template +typename enable_if, hash_code>::type +hash_value(T value) { + return ::llvm::hashing::detail::hash_integer_value(value); +} + +// Declared and documented above, but defined here so that any of the hashing +// infrastructure is available. +template hash_code hash_value(const T *ptr) { + return ::llvm::hashing::detail::hash_integer_value( + reinterpret_cast(ptr)); +} + +// Declared and documented above, but defined here so that any of the hashing +// infrastructure is available. +template +hash_code hash_value(const std::pair &arg) { + return hash_combine(arg.first, arg.second); +} + +// Declared and documented above, but defined here so that any of the hashing +// infrastructure is available. +template +hash_code hash_value(const std::basic_string &arg) { + return hash_combine_range(arg.begin(), arg.end()); +} + +} // namespace llvm + +#endif diff --git a/contrib/llvm/include/llvm/ADT/ImmutableSet.h b/contrib/llvm/include/llvm/ADT/ImmutableSet.h index d597a7c9be72..89b164819d37 100644 --- a/contrib/llvm/include/llvm/ADT/ImmutableSet.h +++ b/contrib/llvm/include/llvm/ADT/ImmutableSet.h @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" #include #include #include @@ -346,7 +347,7 @@ class ImutAVLTree { if (prev) prev->next = next; else - factory->Cache[computeDigest()] = next; + factory->Cache[factory->maskCacheIndex(computeDigest())] = next; } // We need to clear the mutability bit in case we are @@ -428,6 +429,11 @@ class ImutAVLFactory { TreeTy* getRight(TreeTy* T) const { return T->getRight(); } value_type_ref getValue(TreeTy* T) const { return T->value; } + // Make sure the index is not the Tombstone or Entry key of the DenseMap. + static inline unsigned maskCacheIndex(unsigned I) { + return (I & ~0x02); + } + unsigned incrementHeight(TreeTy* L, TreeTy* R) const { unsigned hl = getHeight(L); unsigned hr = getHeight(R); @@ -610,7 +616,7 @@ class ImutAVLFactory { // Search the hashtable for another tree with the same digest, and // if find a collision compare those trees by their contents. unsigned digest = TNew->computeDigest(); - TreeTy *&entry = Cache[digest]; + TreeTy *&entry = Cache[maskCacheIndex(digest)]; do { if (!entry) break; @@ -686,7 +692,7 @@ class ImutAVLTreeGenericIterator { stack.back() |= VisitedRight; break; default: - assert(false && "Unreachable."); + llvm_unreachable("Unreachable."); } } @@ -722,7 +728,7 @@ class ImutAVLTreeGenericIterator { skipToParent(); break; default: - assert(false && "Unreachable."); + llvm_unreachable("Unreachable."); } return *this; } @@ -747,7 +753,7 @@ class ImutAVLTreeGenericIterator { stack.push_back(reinterpret_cast(R) | VisitedRight); break; default: - assert(false && "Unreachable."); + llvm_unreachable("Unreachable."); } return *this; } diff --git a/contrib/llvm/include/llvm/ADT/IntervalMap.h b/contrib/llvm/include/llvm/ADT/IntervalMap.h index 1230e8f5fb43..931b67e40911 100644 --- a/contrib/llvm/include/llvm/ADT/IntervalMap.h +++ b/contrib/llvm/include/llvm/ADT/IntervalMap.h @@ -739,7 +739,7 @@ class BranchNode : public NodeBase { // A Path is used by iterators to represent a position in a B+-tree, and the // path to get there from the root. // -// The Path class also constains the tree navigation code that doesn't have to +// The Path class also contains the tree navigation code that doesn't have to // be templatized. // //===----------------------------------------------------------------------===// @@ -1977,7 +1977,7 @@ iterator::overflow(unsigned Level) { CurSize[Nodes] = CurSize[NewNode]; Node[Nodes] = Node[NewNode]; CurSize[NewNode] = 0; - Node[NewNode] = this->map->newNode(); + Node[NewNode] = this->map->template newNode(); ++Nodes; } diff --git a/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h b/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h index 2f6fd2bd5590..3a1a3f4634cf 100644 --- a/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h +++ b/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h @@ -46,6 +46,7 @@ namespace llvm { public: RefCountedBase() : ref_cnt(0) {} + RefCountedBase(const RefCountedBase &) : ref_cnt(0) {} void Retain() const { ++ref_cnt; } void Release() const { @@ -64,9 +65,12 @@ namespace llvm { //===----------------------------------------------------------------------===// class RefCountedBaseVPTR { mutable unsigned ref_cnt; + virtual void anchor(); protected: RefCountedBaseVPTR() : ref_cnt(0) {} + RefCountedBaseVPTR(const RefCountedBaseVPTR &) : ref_cnt(0) {} + virtual ~RefCountedBaseVPTR() {} void Retain() const { ++ref_cnt; } @@ -76,9 +80,15 @@ namespace llvm { } template - friend class IntrusiveRefCntPtr; + friend struct IntrusiveRefCntPtrInfo; }; + + template struct IntrusiveRefCntPtrInfo { + static void retain(T *obj) { obj->Retain(); } + static void release(T *obj) { obj->Release(); } + }; + //===----------------------------------------------------------------------===// /// IntrusiveRefCntPtr - A template class that implements a "smart pointer" /// that assumes the wrapped object has a reference count associated @@ -105,7 +115,7 @@ namespace llvm { explicit IntrusiveRefCntPtr() : Obj(0) {} - explicit IntrusiveRefCntPtr(T* obj) : Obj(obj) { + IntrusiveRefCntPtr(T* obj) : Obj(obj) { retain(); } @@ -153,14 +163,19 @@ namespace llvm { other.Obj = Obj; Obj = tmp; } - + + void reset() { + release(); + Obj = 0; + } + void resetWithoutRelease() { Obj = 0; } private: - void retain() { if (Obj) Obj->Retain(); } - void release() { if (Obj) Obj->Release(); } + void retain() { if (Obj) IntrusiveRefCntPtrInfo::retain(Obj); } + void release() { if (Obj) IntrusiveRefCntPtrInfo::release(Obj); } void replace(T* S) { this_type(S).swap(*this); diff --git a/contrib/llvm/include/llvm/ADT/PointerIntPair.h b/contrib/llvm/include/llvm/ADT/PointerIntPair.h index 85dbba2b4a4a..ccdcd1a8d1b9 100644 --- a/contrib/llvm/include/llvm/ADT/PointerIntPair.h +++ b/contrib/llvm/include/llvm/ADT/PointerIntPair.h @@ -92,10 +92,14 @@ class PointerIntPair { } PointerTy const *getAddrOfPointer() const { + return const_cast(this)->getAddrOfPointer(); + } + + PointerTy *getAddrOfPointer() { assert(Value == reinterpret_cast(getPointer()) && "Can only return the address if IntBits is cleared and " "PtrTraits doesn't change the pointer"); - return reinterpret_cast(&Value); + return reinterpret_cast(&Value); } void *getOpaqueValue() const { return reinterpret_cast(Value); } diff --git a/contrib/llvm/include/llvm/ADT/PointerUnion.h b/contrib/llvm/include/llvm/ADT/PointerUnion.h index 487096a17105..614b59c844e3 100644 --- a/contrib/llvm/include/llvm/ADT/PointerUnion.h +++ b/contrib/llvm/include/llvm/ADT/PointerUnion.h @@ -142,16 +142,19 @@ namespace llvm { return T(); } - /// \brief If the union is set to the first pointer type we can get an - /// address pointing to it. - template - PT1 const *getAddrOf() const { + /// \brief If the union is set to the first pointer type get an address + /// pointing to it. + PT1 const *getAddrOfPtr1() const { + return const_cast(this)->getAddrOfPtr1(); + } + + /// \brief If the union is set to the first pointer type get an address + /// pointing to it. + PT1 *getAddrOfPtr1() { assert(is() && "Val is not the first pointer"); assert(get() == Val.getPointer() && "Can't get the address because PointerLikeTypeTraits changes the ptr"); - T const *can_only_get_address_of_first_pointer_type - = reinterpret_cast(Val.getAddrOfPointer()); - return can_only_get_address_of_first_pointer_type; + return (PT1 *)Val.getAddrOfPointer(); } /// Assignment operators - Allow assigning into this union from either @@ -263,7 +266,7 @@ namespace llvm { ::llvm::PointerUnionTypeSelector >::Return Ty; - return Ty(Val).is(); + return Ty(Val).template is(); } /// get() - Return the value of the specified pointer type. If the @@ -276,7 +279,7 @@ namespace llvm { ::llvm::PointerUnionTypeSelector >::Return Ty; - return Ty(Val).get(); + return Ty(Val).template get(); } /// dyn_cast() - If the current value is of the specified pointer type, diff --git a/contrib/llvm/include/llvm/ADT/SetVector.h b/contrib/llvm/include/llvm/ADT/SetVector.h index abe20676d54d..965f0deacaa2 100644 --- a/contrib/llvm/include/llvm/ADT/SetVector.h +++ b/contrib/llvm/include/llvm/ADT/SetVector.h @@ -144,6 +144,12 @@ class SetVector { set_.erase(back()); vector_.pop_back(); } + + T pop_back_val() { + T Ret = back(); + pop_back(); + return Ret; + } bool operator==(const SetVector &that) const { return vector_ == that.vector_; diff --git a/contrib/llvm/include/llvm/ADT/SmallBitVector.h b/contrib/llvm/include/llvm/ADT/SmallBitVector.h index b15b3ee0418f..a3469a1c6226 100644 --- a/contrib/llvm/include/llvm/ADT/SmallBitVector.h +++ b/contrib/llvm/include/llvm/ADT/SmallBitVector.h @@ -175,7 +175,7 @@ class SmallBitVector { return CountPopulation_32(Bits); if (sizeof(uintptr_t) * CHAR_BIT == 64) return CountPopulation_64(Bits); - assert(0 && "Unsupported!"); + llvm_unreachable("Unsupported!"); } return getPointer()->count(); } @@ -212,7 +212,7 @@ class SmallBitVector { return CountTrailingZeros_32(Bits); if (sizeof(uintptr_t) * CHAR_BIT == 64) return CountTrailingZeros_64(Bits); - assert(0 && "Unsupported!"); + llvm_unreachable("Unsupported!"); } return getPointer()->find_first(); } @@ -230,7 +230,7 @@ class SmallBitVector { return CountTrailingZeros_32(Bits); if (sizeof(uintptr_t) * CHAR_BIT == 64) return CountTrailingZeros_64(Bits); - assert(0 && "Unsupported!"); + llvm_unreachable("Unsupported!"); } return getPointer()->find_next(Prev); } diff --git a/contrib/llvm/include/llvm/ADT/SmallPtrSet.h b/contrib/llvm/include/llvm/ADT/SmallPtrSet.h index 9992858d67b0..70693d5b9aa2 100644 --- a/contrib/llvm/include/llvm/ADT/SmallPtrSet.h +++ b/contrib/llvm/include/llvm/ADT/SmallPtrSet.h @@ -137,6 +137,10 @@ class SmallPtrSetImpl { void operator=(const SmallPtrSetImpl &RHS); // DO NOT IMPLEMENT. protected: + /// swap - Swaps the elements of two sets. + /// Note: This method assumes that both sets have the same small size. + void swap(SmallPtrSetImpl &RHS); + void CopyFrom(const SmallPtrSetImpl &RHS); }; @@ -287,8 +291,20 @@ class SmallPtrSet : public SmallPtrSetImpl { return *this; } + /// swap - Swaps the elements of two sets. + void swap(SmallPtrSet &RHS) { + SmallPtrSetImpl::swap(RHS); + } }; } +namespace std { + /// Implement std::swap in terms of SmallPtrSet swap. + template + inline void swap(llvm::SmallPtrSet &LHS, llvm::SmallPtrSet &RHS) { + LHS.swap(RHS); + } +} + #endif diff --git a/contrib/llvm/include/llvm/ADT/SmallSet.h b/contrib/llvm/include/llvm/ADT/SmallSet.h index d03f1bef15b1..cd117f59ba76 100644 --- a/contrib/llvm/include/llvm/ADT/SmallSet.h +++ b/contrib/llvm/include/llvm/ADT/SmallSet.h @@ -27,13 +27,13 @@ namespace llvm { /// /// Note that this set does not provide a way to iterate over members in the /// set. -template +template > class SmallSet { /// Use a SmallVector to hold the elements here (even though it will never /// reach its 'large' stage) to avoid calling the default ctors of elements /// we will never use. SmallVector Vector; - std::set Set; + std::set Set; typedef typename SmallVector::const_iterator VIterator; typedef typename SmallVector::iterator mutable_iterator; public: diff --git a/contrib/llvm/include/llvm/ADT/SmallString.h b/contrib/llvm/include/llvm/ADT/SmallString.h index da264164821f..199783ba3899 100644 --- a/contrib/llvm/include/llvm/ADT/SmallString.h +++ b/contrib/llvm/include/llvm/ADT/SmallString.h @@ -24,21 +24,244 @@ namespace llvm { template class SmallString : public SmallVector { public: - // Default ctor - Initialize to empty. + /// Default ctor - Initialize to empty. SmallString() {} - // Initialize from a StringRef. + /// Initialize from a StringRef. SmallString(StringRef S) : SmallVector(S.begin(), S.end()) {} - // Initialize with a range. + /// Initialize with a range. template SmallString(ItTy S, ItTy E) : SmallVector(S, E) {} - // Copy ctor. + /// Copy ctor. SmallString(const SmallString &RHS) : SmallVector(RHS) {} + // Note that in order to add new overloads for append & assign, we have to + // duplicate the inherited versions so as not to inadvertently hide them. + + /// @} + /// @name String Assignment + /// @{ + + /// Assign from a repeated element + void assign(unsigned NumElts, char Elt) { + this->SmallVectorImpl::assign(NumElts, Elt); + } + + /// Assign from an iterator pair + template + void assign(in_iter S, in_iter E) { + this->clear(); + SmallVectorImpl::append(S, E); + } + + /// Assign from a StringRef + void assign(StringRef RHS) { + this->clear(); + SmallVectorImpl::append(RHS.begin(), RHS.end()); + } + + /// Assign from a SmallVector + void assign(const SmallVectorImpl &RHS) { + this->clear(); + SmallVectorImpl::append(RHS.begin(), RHS.end()); + } + + /// @} + /// @name String Concatenation + /// @{ + + /// Append from an iterator pair + template + void append(in_iter S, in_iter E) { + SmallVectorImpl::append(S, E); + } + + /// Append from a StringRef + void append(StringRef RHS) { + SmallVectorImpl::append(RHS.begin(), RHS.end()); + } + + /// Append from a SmallVector + void append(const SmallVectorImpl &RHS) { + SmallVectorImpl::append(RHS.begin(), RHS.end()); + } + + /// @} + /// @name String Comparison + /// @{ + + /// equals - Check for string equality, this is more efficient than + /// compare() when the relative ordering of inequal strings isn't needed. + bool equals(StringRef RHS) const { + return str().equals(RHS); + } + + /// equals_lower - Check for string equality, ignoring case. + bool equals_lower(StringRef RHS) const { + return str().equals_lower(RHS); + } + + /// compare - Compare two strings; the result is -1, 0, or 1 if this string + /// is lexicographically less than, equal to, or greater than the \arg RHS. + int compare(StringRef RHS) const { + return str().compare(RHS); + } + + /// compare_lower - Compare two strings, ignoring case. + int compare_lower(StringRef RHS) const { + return str().compare_lower(RHS); + } + + /// compare_numeric - Compare two strings, treating sequences of digits as + /// numbers. + int compare_numeric(StringRef RHS) const { + return str().compare_numeric(RHS); + } + + /// @} + /// @name String Predicates + /// @{ + + /// startswith - Check if this string starts with the given \arg Prefix. + bool startswith(StringRef Prefix) const { + return str().startswith(Prefix); + } + + /// endswith - Check if this string ends with the given \arg Suffix. + bool endswith(StringRef Suffix) const { + return str().endswith(Suffix); + } + + /// @} + /// @name String Searching + /// @{ + + /// find - Search for the first character \arg C in the string. + /// + /// \return - The index of the first occurrence of \arg C, or npos if not + /// found. + size_t find(char C, size_t From = 0) const { + return str().find(C, From); + } + + /// find - Search for the first string \arg Str in the string. + /// + /// \return - The index of the first occurrence of \arg Str, or npos if not + /// found. + size_t find(StringRef Str, size_t From = 0) const { + return str().find(Str, From); + } + + /// rfind - Search for the last character \arg C in the string. + /// + /// \return - The index of the last occurrence of \arg C, or npos if not + /// found. + size_t rfind(char C, size_t From = StringRef::npos) const { + return str().rfind(C, From); + } + + /// rfind - Search for the last string \arg Str in the string. + /// + /// \return - The index of the last occurrence of \arg Str, or npos if not + /// found. + size_t rfind(StringRef Str) const { + return str().rfind(Str); + } + + /// find_first_of - Find the first character in the string that is \arg C, + /// or npos if not found. Same as find. + size_t find_first_of(char C, size_t From = 0) const { + return str().find_first_of(C, From); + } + + /// find_first_of - Find the first character in the string that is in \arg + /// Chars, or npos if not found. + /// + /// Note: O(size() + Chars.size()) + size_t find_first_of(StringRef Chars, size_t From = 0) const { + return str().find_first_of(Chars, From); + } + + /// find_first_not_of - Find the first character in the string that is not + /// \arg C or npos if not found. + size_t find_first_not_of(char C, size_t From = 0) const { + return str().find_first_not_of(C, From); + } + + /// find_first_not_of - Find the first character in the string that is not + /// in the string \arg Chars, or npos if not found. + /// + /// Note: O(size() + Chars.size()) + size_t find_first_not_of(StringRef Chars, size_t From = 0) const { + return str().find_first_not_of(Chars, From); + } + + /// find_last_of - Find the last character in the string that is \arg C, or + /// npos if not found. + size_t find_last_of(char C, size_t From = StringRef::npos) const { + return str().find_last_of(C, From); + } + + /// find_last_of - Find the last character in the string that is in \arg C, + /// or npos if not found. + /// + /// Note: O(size() + Chars.size()) + size_t find_last_of( + StringRef Chars, size_t From = StringRef::npos) const { + return str().find_last_of(Chars, From); + } + + /// @} + /// @name Helpful Algorithms + /// @{ + + /// count - Return the number of occurrences of \arg C in the string. + size_t count(char C) const { + return str().count(C); + } + + /// count - Return the number of non-overlapped occurrences of \arg Str in + /// the string. + size_t count(StringRef Str) const { + return str().count(Str); + } + + /// @} + /// @name Substring Operations + /// @{ + + /// substr - Return a reference to the substring from [Start, Start + N). + /// + /// \param Start - The index of the starting character in the substring; if + /// the index is npos or greater than the length of the string then the + /// empty substring will be returned. + /// + /// \param N - The number of characters to included in the substring. If N + /// exceeds the number of characters remaining in the string, the string + /// suffix (starting with \arg Start) will be returned. + StringRef substr(size_t Start, size_t N = StringRef::npos) const { + return str().substr(Start, N); + } + + /// slice - Return a reference to the substring from [Start, End). + /// + /// \param Start - The index of the starting character in the substring; if + /// the index is npos or greater than the length of the string then the + /// empty substring will be returned. + /// + /// \param End - The index following the last character to include in the + /// substring. If this is npos, or less than \arg Start, or exceeds the + /// number of characters remaining in the string, the string suffix + /// (starting with \arg Start) will be returned. + StringRef slice(size_t Start, size_t End) const { + return str().slice(Start, End); + } // Extra methods. + + /// Explicit conversion to StringRef StringRef str() const { return StringRef(this->begin(), this->size()); } // TODO: Make this const, if it's safe... @@ -48,7 +271,7 @@ class SmallString : public SmallVector { return this->data(); } - // Implicit conversion to StringRef. + /// Implicit conversion to StringRef. operator StringRef() const { return str(); } // Extra operators. diff --git a/contrib/llvm/include/llvm/ADT/SmallVector.h b/contrib/llvm/include/llvm/ADT/SmallVector.h index 1c42f29771b3..0d9d0d12e868 100644 --- a/contrib/llvm/include/llvm/ADT/SmallVector.h +++ b/contrib/llvm/include/llvm/ADT/SmallVector.h @@ -23,30 +23,6 @@ #include #include -#ifdef _MSC_VER -namespace std { -#if _MSC_VER <= 1310 - // Work around flawed VC++ implementation of std::uninitialized_copy. Define - // additional overloads so that elements with pointer types are recognized as - // scalars and not objects, causing bizarre type conversion errors. - template - inline _Scalar_ptr_iterator_tag _Ptr_cat(T1 **, T2 **) { - _Scalar_ptr_iterator_tag _Cat; - return _Cat; - } - - template - inline _Scalar_ptr_iterator_tag _Ptr_cat(T1* const *, T2 **) { - _Scalar_ptr_iterator_tag _Cat; - return _Cat; - } -#else -// FIXME: It is not clear if the problem is fixed in VS 2005. What is clear -// is that the above hack won't work if it wasn't fixed. -#endif -} -#endif - namespace llvm { /// SmallVectorBase - This is all the non-templated stuff common to all @@ -100,10 +76,10 @@ class SmallVectorBase { template class SmallVectorTemplateCommon : public SmallVectorBase { protected: - void setEnd(T *P) { this->EndX = P; } -public: SmallVectorTemplateCommon(size_t Size) : SmallVectorBase(Size) {} + void setEnd(T *P) { this->EndX = P; } +public: typedef size_t size_type; typedef ptrdiff_t difference_type; typedef T value_type; @@ -174,7 +150,7 @@ class SmallVectorTemplateCommon : public SmallVectorBase { /// implementations that are designed to work with non-POD-like T's. template class SmallVectorTemplateBase : public SmallVectorTemplateCommon { -public: +protected: SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon(Size) {} static void destroy_range(T *S, T *E) { @@ -194,6 +170,23 @@ class SmallVectorTemplateBase : public SmallVectorTemplateCommon { /// grow - double the size of the allocated memory, guaranteeing space for at /// least one more element or MinSize if specified. void grow(size_t MinSize = 0); + +public: + void push_back(const T &Elt) { + if (this->EndX < this->CapacityX) { + Retry: + new (this->end()) T(Elt); + this->setEnd(this->end()+1); + return; + } + this->grow(); + goto Retry; + } + + void pop_back() { + this->setEnd(this->end()-1); + this->end()->~T(); + } }; // Define this out-of-line to dissuade the C++ compiler from inlining it. @@ -226,7 +219,7 @@ void SmallVectorTemplateBase::grow(size_t MinSize) { /// implementations that are designed to work with POD-like T's. template class SmallVectorTemplateBase : public SmallVectorTemplateCommon { -public: +protected: SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon(Size) {} // No need to do a destroy loop for POD's. @@ -255,6 +248,21 @@ class SmallVectorTemplateBase : public SmallVectorTemplateCommon { void grow(size_t MinSize = 0) { this->grow_pod(MinSize*sizeof(T), sizeof(T)); } +public: + void push_back(const T &Elt) { + if (this->EndX < this->CapacityX) { + Retry: + *this->end() = Elt; + this->setEnd(this->end()+1); + return; + } + this->grow(); + goto Retry; + } + + void pop_back() { + this->setEnd(this->end()-1); + } }; @@ -270,11 +278,13 @@ class SmallVectorImpl : public SmallVectorTemplateBase::value> { typedef typename SuperClass::iterator iterator; typedef typename SuperClass::size_type size_type; +protected: // Default ctor - Initialize to empty. explicit SmallVectorImpl(unsigned N) : SmallVectorTemplateBase::value>(N*sizeof(T)) { } +public: ~SmallVectorImpl() { // Destroy the constructed elements in the vector. this->destroy_range(this->begin(), this->end()); @@ -297,7 +307,7 @@ class SmallVectorImpl : public SmallVectorTemplateBase::value> { } else if (N > this->size()) { if (this->capacity() < N) this->grow(N); - this->construct_range(this->end(), this->begin()+N, T()); + std::uninitialized_fill(this->end(), this->begin()+N, T()); this->setEnd(this->begin()+N); } } @@ -309,7 +319,7 @@ class SmallVectorImpl : public SmallVectorTemplateBase::value> { } else if (N > this->size()) { if (this->capacity() < N) this->grow(N); - construct_range(this->end(), this->begin()+N, NV); + std::uninitialized_fill(this->end(), this->begin()+N, NV); this->setEnd(this->begin()+N); } } @@ -319,25 +329,9 @@ class SmallVectorImpl : public SmallVectorTemplateBase::value> { this->grow(N); } - void push_back(const T &Elt) { - if (this->EndX < this->CapacityX) { - Retry: - new (this->end()) T(Elt); - this->setEnd(this->end()+1); - return; - } - this->grow(); - goto Retry; - } - - void pop_back() { - this->setEnd(this->end()-1); - this->end()->~T(); - } - T pop_back_val() { T Result = this->back(); - pop_back(); + this->pop_back(); return Result; } @@ -376,7 +370,7 @@ class SmallVectorImpl : public SmallVectorTemplateBase::value> { if (this->capacity() < NumElts) this->grow(NumElts); this->setEnd(this->begin()+NumElts); - construct_range(this->begin(), this->end(), Elt); + std::uninitialized_fill(this->begin(), this->end(), Elt); } iterator erase(iterator I) { @@ -384,7 +378,7 @@ class SmallVectorImpl : public SmallVectorTemplateBase::value> { // Shift all elts down one. std::copy(I+1, this->end(), I); // Drop the last elt. - pop_back(); + this->pop_back(); return(N); } @@ -400,7 +394,7 @@ class SmallVectorImpl : public SmallVectorTemplateBase::value> { iterator insert(iterator I, const T &Elt) { if (I == this->end()) { // Important special case for empty vector. - push_back(Elt); + this->push_back(Elt); return this->end()-1; } @@ -554,12 +548,6 @@ class SmallVectorImpl : public SmallVectorTemplateBase::value> { assert(N <= this->capacity()); this->setEnd(this->begin() + N); } - -private: - static void construct_range(T *S, T *E, const T &Elt) { - for (; S != E; ++S) - new (S) T(Elt); - } }; @@ -686,9 +674,7 @@ class SmallVector : public SmallVectorImpl { explicit SmallVector(unsigned Size, const T &Value = T()) : SmallVectorImpl(NumTsAvailable) { - this->reserve(Size); - while (Size--) - this->push_back(Value); + this->assign(Size, Value); } template @@ -718,9 +704,7 @@ class SmallVector : public SmallVectorImpl { explicit SmallVector(unsigned Size, const T &Value = T()) : SmallVectorImpl(0) { - this->reserve(Size); - while (Size--) - this->push_back(Value); + this->assign(Size, Value); } template diff --git a/contrib/llvm/include/llvm/ADT/SparseBitVector.h b/contrib/llvm/include/llvm/ADT/SparseBitVector.h index d977136b2fc1..89774c3f5628 100644 --- a/contrib/llvm/include/llvm/ADT/SparseBitVector.h +++ b/contrib/llvm/include/llvm/ADT/SparseBitVector.h @@ -18,11 +18,11 @@ #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include #include -#include namespace llvm { @@ -128,7 +128,7 @@ struct SparseBitVectorElement else if (sizeof(BitWord) == 8) NumBits += CountPopulation_64(Bits[i]); else - assert(0 && "Unsupported!"); + llvm_unreachable("Unsupported!"); return NumBits; } @@ -138,13 +138,11 @@ struct SparseBitVectorElement if (Bits[i] != 0) { if (sizeof(BitWord) == 4) return i * BITWORD_SIZE + CountTrailingZeros_32(Bits[i]); - else if (sizeof(BitWord) == 8) + if (sizeof(BitWord) == 8) return i * BITWORD_SIZE + CountTrailingZeros_64(Bits[i]); - else - assert(0 && "Unsupported!"); + llvm_unreachable("Unsupported!"); } - assert(0 && "Illegal empty element"); - return 0; // Not reached + llvm_unreachable("Illegal empty element"); } /// find_next - Returns the index of the next set bit starting from the @@ -165,10 +163,9 @@ struct SparseBitVectorElement if (Copy != 0) { if (sizeof(BitWord) == 4) return WordPos * BITWORD_SIZE + CountTrailingZeros_32(Copy); - else if (sizeof(BitWord) == 8) + if (sizeof(BitWord) == 8) return WordPos * BITWORD_SIZE + CountTrailingZeros_64(Copy); - else - assert(0 && "Unsupported!"); + llvm_unreachable("Unsupported!"); } // Check subsequent words. @@ -176,10 +173,9 @@ struct SparseBitVectorElement if (Bits[i] != 0) { if (sizeof(BitWord) == 4) return i * BITWORD_SIZE + CountTrailingZeros_32(Bits[i]); - else if (sizeof(BitWord) == 8) + if (sizeof(BitWord) == 8) return i * BITWORD_SIZE + CountTrailingZeros_64(Bits[i]); - else - assert(0 && "Unsupported!"); + llvm_unreachable("Unsupported!"); } return -1; } @@ -264,15 +260,6 @@ struct SparseBitVectorElement } BecameZero = allzero; } - - // Get a hash value for this element; - uint64_t getHashValue() const { - uint64_t HashVal = 0; - for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i) { - HashVal ^= Bits[i]; - } - return HashVal; - } }; template @@ -813,18 +800,6 @@ class SparseBitVector { iterator end() const { return iterator(this, true); } - - // Get a hash value for this bitmap. - uint64_t getHashValue() const { - uint64_t HashVal = 0; - for (ElementListConstIter Iter = Elements.begin(); - Iter != Elements.end(); - ++Iter) { - HashVal ^= Iter->index(); - HashVal ^= Iter->getHashValue(); - } - return HashVal; - } }; // Convenience functions to allow Or and And without dereferencing in the user diff --git a/contrib/llvm/include/llvm/ADT/SparseSet.h b/contrib/llvm/include/llvm/ADT/SparseSet.h new file mode 100644 index 000000000000..923c6a5954d0 --- /dev/null +++ b/contrib/llvm/include/llvm/ADT/SparseSet.h @@ -0,0 +1,268 @@ +//===--- llvm/ADT/SparseSet.h - Sparse set ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the SparseSet class derived from the version described in +// Briggs, Torczon, "An efficient representation for sparse sets", ACM Letters +// on Programming Languages and Systems, Volume 2 Issue 1-4, March-Dec. 1993. +// +// A sparse set holds a small number of objects identified by integer keys from +// a moderately sized universe. The sparse set uses more memory than other +// containers in order to provide faster operations. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_SPARSESET_H +#define LLVM_ADT_SPARSESET_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/DataTypes.h" +#include + +namespace llvm { + +/// SparseSetFunctor - Objects in a SparseSet are identified by small integer +/// keys. A functor object is used to compute the key of an object. The +/// functor's operator() must return an unsigned smaller than the universe. +/// +/// The default functor implementation forwards to a getSparseSetKey() method +/// on the object. It is intended for sparse sets holding ad-hoc structs. +/// +template +struct SparseSetFunctor { + unsigned operator()(const ValueT &Val) { + return Val.getSparseSetKey(); + } +}; + +/// SparseSetFunctor - Provide a trivial identity functor for +/// SparseSet. +/// +template<> struct SparseSetFunctor { + unsigned operator()(unsigned Val) { return Val; } +}; + +/// SparseSet - Fast set implementation for objects that can be identified by +/// small unsigned keys. +/// +/// SparseSet allocates memory proportional to the size of the key universe, so +/// it is not recommended for building composite data structures. It is useful +/// for algorithms that require a single set with fast operations. +/// +/// Compared to DenseSet and DenseMap, SparseSet provides constant-time fast +/// clear() and iteration as fast as a vector. The find(), insert(), and +/// erase() operations are all constant time, and typically faster than a hash +/// table. The iteration order doesn't depend on numerical key values, it only +/// depends on the order of insert() and erase() operations. When no elements +/// have been erased, the iteration order is the insertion order. +/// +/// Compared to BitVector, SparseSet uses 8x-40x more memory, but +/// offers constant-time clear() and size() operations as well as fast +/// iteration independent on the size of the universe. +/// +/// SparseSet contains a dense vector holding all the objects and a sparse +/// array holding indexes into the dense vector. Most of the memory is used by +/// the sparse array which is the size of the key universe. The SparseT +/// template parameter provides a space/speed tradeoff for sets holding many +/// elements. +/// +/// When SparseT is uint32_t, find() only touches 2 cache lines, but the sparse +/// array uses 4 x Universe bytes. +/// +/// When SparseT is uint8_t (the default), find() touches up to 2+[N/256] cache +/// lines, but the sparse array is 4x smaller. N is the number of elements in +/// the set. +/// +/// For sets that may grow to thousands of elements, SparseT should be set to +/// uint16_t or uint32_t. +/// +/// @param ValueT The type of objects in the set. +/// @param SparseT An unsigned integer type. See above. +/// @param KeyFunctorT A functor that computes the unsigned key of a ValueT. +/// +template > +class SparseSet { + typedef SmallVector DenseT; + DenseT Dense; + SparseT *Sparse; + unsigned Universe; + KeyFunctorT KeyOf; + + // Disable copy construction and assignment. + // This data structure is not meant to be used that way. + SparseSet(const SparseSet&); // DO NOT IMPLEMENT. + SparseSet &operator=(const SparseSet&); // DO NOT IMPLEMENT. + +public: + typedef ValueT value_type; + typedef ValueT &reference; + typedef const ValueT &const_reference; + typedef ValueT *pointer; + typedef const ValueT *const_pointer; + + SparseSet() : Sparse(0), Universe(0) {} + ~SparseSet() { free(Sparse); } + + /// setUniverse - Set the universe size which determines the largest key the + /// set can hold. The universe must be sized before any elements can be + /// added. + /// + /// @param U Universe size. All object keys must be less than U. + /// + void setUniverse(unsigned U) { + // It's not hard to resize the universe on a non-empty set, but it doesn't + // seem like a likely use case, so we can add that code when we need it. + assert(empty() && "Can only resize universe on an empty map"); + // Hysteresis prevents needless reallocations. + if (U >= Universe/4 && U <= Universe) + return; + free(Sparse); + // The Sparse array doesn't actually need to be initialized, so malloc + // would be enough here, but that will cause tools like valgrind to + // complain about branching on uninitialized data. + Sparse = reinterpret_cast(calloc(U, sizeof(SparseT))); + Universe = U; + } + + // Import trivial vector stuff from DenseT. + typedef typename DenseT::iterator iterator; + typedef typename DenseT::const_iterator const_iterator; + + const_iterator begin() const { return Dense.begin(); } + const_iterator end() const { return Dense.end(); } + iterator begin() { return Dense.begin(); } + iterator end() { return Dense.end(); } + + /// empty - Returns true if the set is empty. + /// + /// This is not the same as BitVector::empty(). + /// + bool empty() const { return Dense.empty(); } + + /// size - Returns the number of elements in the set. + /// + /// This is not the same as BitVector::size() which returns the size of the + /// universe. + /// + unsigned size() const { return Dense.size(); } + + /// clear - Clears the set. This is a very fast constant time operation. + /// + void clear() { + // Sparse does not need to be cleared, see find(). + Dense.clear(); + } + + /// find - Find an element by its key. + /// + /// @param Key A valid key to find. + /// @returns An iterator to the element identified by key, or end(). + /// + iterator find(unsigned Key) { + assert(Key < Universe && "Key out of range"); + assert(std::numeric_limits::is_integer && + !std::numeric_limits::is_signed && + "SparseT must be an unsigned integer type"); + const unsigned Stride = std::numeric_limits::max() + 1u; + for (unsigned i = Sparse[Key], e = size(); i < e; i += Stride) { + const unsigned FoundKey = KeyOf(Dense[i]); + assert(FoundKey < Universe && "Invalid key in set. Did object mutate?"); + if (Key == FoundKey) + return begin() + i; + // Stride is 0 when SparseT >= unsigned. We don't need to loop. + if (!Stride) + break; + } + return end(); + } + + const_iterator find(unsigned Key) const { + return const_cast(this)->find(Key); + } + + /// count - Returns true if this set contains an element identified by Key. + /// + bool count(unsigned Key) const { + return find(Key) != end(); + } + + /// insert - Attempts to insert a new element. + /// + /// If Val is successfully inserted, return (I, true), where I is an iterator + /// pointing to the newly inserted element. + /// + /// If the set already contains an element with the same key as Val, return + /// (I, false), where I is an iterator pointing to the existing element. + /// + /// Insertion invalidates all iterators. + /// + std::pair insert(const ValueT &Val) { + unsigned Key = KeyOf(Val); + iterator I = find(Key); + if (I != end()) + return std::make_pair(I, false); + Sparse[Key] = size(); + Dense.push_back(Val); + return std::make_pair(end() - 1, true); + } + + /// array subscript - If an element already exists with this key, return it. + /// Otherwise, automatically construct a new value from Key, insert it, + /// and return the newly inserted element. + ValueT &operator[](unsigned Key) { + return *insert(ValueT(Key)).first; + } + + /// erase - Erases an existing element identified by a valid iterator. + /// + /// This invalidates all iterators, but erase() returns an iterator pointing + /// to the next element. This makes it possible to erase selected elements + /// while iterating over the set: + /// + /// for (SparseSet::iterator I = Set.begin(); I != Set.end();) + /// if (test(*I)) + /// I = Set.erase(I); + /// else + /// ++I; + /// + /// Note that end() changes when elements are erased, unlike std::list. + /// + iterator erase(iterator I) { + assert(unsigned(I - begin()) < size() && "Invalid iterator"); + if (I != end() - 1) { + *I = Dense.back(); + unsigned BackKey = KeyOf(Dense.back()); + assert(BackKey < Universe && "Invalid key in set. Did object mutate?"); + Sparse[BackKey] = I - begin(); + } + // This depends on SmallVector::pop_back() not invalidating iterators. + // std::vector::pop_back() doesn't give that guarantee. + Dense.pop_back(); + return I; + } + + /// erase - Erases an element identified by Key, if it exists. + /// + /// @param Key The key identifying the element to erase. + /// @returns True when an element was erased, false if no element was found. + /// + bool erase(unsigned Key) { + iterator I = find(Key); + if (I == end()) + return false; + erase(I); + return true; + } + +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/include/llvm/ADT/Statistic.h b/contrib/llvm/include/llvm/ADT/Statistic.h index b8a1a2f5c4e8..b54d10b9dd33 100644 --- a/contrib/llvm/include/llvm/ADT/Statistic.h +++ b/contrib/llvm/include/llvm/ADT/Statistic.h @@ -27,6 +27,7 @@ #define LLVM_ADT_STATISTIC_H #include "llvm/Support/Atomic.h" +#include "llvm/Support/Valgrind.h" namespace llvm { class raw_ostream; @@ -110,6 +111,7 @@ class Statistic { bool tmp = Initialized; sys::MemoryFence(); if (!tmp) RegisterStatistic(); + TsanHappensAfter(this); return *this; } void RegisterStatistic(); diff --git a/contrib/llvm/include/llvm/ADT/StringExtras.h b/contrib/llvm/include/llvm/ADT/StringExtras.h index d01d3e1d6b10..655d884e7baa 100644 --- a/contrib/llvm/include/llvm/ADT/StringExtras.h +++ b/contrib/llvm/include/llvm/ADT/StringExtras.h @@ -15,12 +15,7 @@ #define LLVM_ADT_STRINGEXTRAS_H #include "llvm/Support/DataTypes.h" -#include "llvm/ADT/APFloat.h" -#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/StringRef.h" -#include -#include -#include namespace llvm { template class SmallVectorImpl; @@ -101,38 +96,6 @@ static inline std::string itostr(int64_t X) { return utostr(static_cast(X)); } -static inline std::string ftostr(double V) { - char Buffer[200]; - sprintf(Buffer, "%20.6e", V); - char *B = Buffer; - while (*B == ' ') ++B; - return B; -} - -static inline std::string ftostr(const APFloat& V) { - if (&V.getSemantics() == &APFloat::IEEEdouble) - return ftostr(V.convertToDouble()); - else if (&V.getSemantics() == &APFloat::IEEEsingle) - return ftostr((double)V.convertToFloat()); - return ""; // error -} - -static inline std::string LowercaseString(const std::string &S) { - std::string result(S); - for (unsigned i = 0; i < S.length(); ++i) - if (isupper(result[i])) - result[i] = char(tolower(result[i])); - return result; -} - -static inline std::string UppercaseString(const std::string &S) { - std::string result(S); - for (unsigned i = 0; i < S.length(); ++i) - if (islower(result[i])) - result[i] = char(toupper(result[i])); - return result; -} - /// StrInStrNoCase - Portable version of strcasestr. Locates the first /// occurrence of string 's1' in string 's2', ignoring case. Returns /// the offset of s2 in s1 or npos if s2 cannot be found. diff --git a/contrib/llvm/include/llvm/ADT/StringMap.h b/contrib/llvm/include/llvm/ADT/StringMap.h index 35077879351b..097418efc817 100644 --- a/contrib/llvm/include/llvm/ADT/StringMap.h +++ b/contrib/llvm/include/llvm/ADT/StringMap.h @@ -51,20 +51,11 @@ class StringMapEntryBase { /// StringMapImpl - This is the base class of StringMap that is shared among /// all of its instantiations. class StringMapImpl { -public: - /// ItemBucket - The hash table consists of an array of these. If Item is - /// non-null, this is an extant entry, otherwise, it is a hole. - struct ItemBucket { - /// FullHashValue - This remembers the full hash value of the key for - /// easy scanning. - unsigned FullHashValue; - - /// Item - This is a pointer to the actual item object. - StringMapEntryBase *Item; - }; - protected: - ItemBucket *TheTable; + // Array of NumBuckets pointers to entries, null pointers are holes. + // TheTable[NumBuckets] contains a sentinel value for easy iteration. Follwed + // by an array of the actual hash values as unsigned integers. + StringMapEntryBase **TheTable; unsigned NumBuckets; unsigned NumItems; unsigned NumTombstones; @@ -238,8 +229,9 @@ class StringMapEntry : public StringMapEntryBase { template class StringMap : public StringMapImpl { AllocatorTy Allocator; - typedef StringMapEntry MapEntryTy; public: + typedef StringMapEntry MapEntryTy; + StringMap() : StringMapImpl(static_cast(sizeof(MapEntryTy))) {} explicit StringMap(unsigned InitialSize) : StringMapImpl(InitialSize, static_cast(sizeof(MapEntryTy))) {} @@ -289,13 +281,13 @@ class StringMap : public StringMapImpl { iterator find(StringRef Key) { int Bucket = FindKey(Key); if (Bucket == -1) return end(); - return iterator(TheTable+Bucket); + return iterator(TheTable+Bucket, true); } const_iterator find(StringRef Key) const { int Bucket = FindKey(Key); if (Bucket == -1) return end(); - return const_iterator(TheTable+Bucket); + return const_iterator(TheTable+Bucket, true); } /// lookup - Return the entry for the specified key, or a default @@ -320,13 +312,13 @@ class StringMap : public StringMapImpl { /// insert it and return true. bool insert(MapEntryTy *KeyValue) { unsigned BucketNo = LookupBucketFor(KeyValue->getKey()); - ItemBucket &Bucket = TheTable[BucketNo]; - if (Bucket.Item && Bucket.Item != getTombstoneVal()) + StringMapEntryBase *&Bucket = TheTable[BucketNo]; + if (Bucket && Bucket != getTombstoneVal()) return false; // Already exists in map. - if (Bucket.Item == getTombstoneVal()) + if (Bucket == getTombstoneVal()) --NumTombstones; - Bucket.Item = KeyValue; + Bucket = KeyValue; ++NumItems; assert(NumItems + NumTombstones <= NumBuckets); @@ -340,10 +332,11 @@ class StringMap : public StringMapImpl { // Zap all values, resetting the keys back to non-present (not tombstone), // which is safe because we're removing all elements. - for (ItemBucket *I = TheTable, *E = TheTable+NumBuckets; I != E; ++I) { - if (I->Item && I->Item != getTombstoneVal()) { - static_cast(I->Item)->Destroy(Allocator); - I->Item = 0; + for (unsigned I = 0, E = NumBuckets; I != E; ++I) { + StringMapEntryBase *&Bucket = TheTable[I]; + if (Bucket && Bucket != getTombstoneVal()) { + static_cast(Bucket)->Destroy(Allocator); + Bucket = 0; } } @@ -357,21 +350,21 @@ class StringMap : public StringMapImpl { template MapEntryTy &GetOrCreateValue(StringRef Key, InitTy Val) { unsigned BucketNo = LookupBucketFor(Key); - ItemBucket &Bucket = TheTable[BucketNo]; - if (Bucket.Item && Bucket.Item != getTombstoneVal()) - return *static_cast(Bucket.Item); + StringMapEntryBase *&Bucket = TheTable[BucketNo]; + if (Bucket && Bucket != getTombstoneVal()) + return *static_cast(Bucket); MapEntryTy *NewItem = MapEntryTy::Create(Key.begin(), Key.end(), Allocator, Val); - if (Bucket.Item == getTombstoneVal()) + if (Bucket == getTombstoneVal()) --NumTombstones; ++NumItems; assert(NumItems + NumTombstones <= NumBuckets); // Fill in the bucket for the hash table. The FullHashValue was already // filled in by LookupBucketFor. - Bucket.Item = NewItem; + Bucket = NewItem; RehashTable(); return *NewItem; @@ -410,21 +403,21 @@ class StringMap : public StringMapImpl { template class StringMapConstIterator { protected: - StringMapImpl::ItemBucket *Ptr; + StringMapEntryBase **Ptr; public: typedef StringMapEntry value_type; - explicit StringMapConstIterator(StringMapImpl::ItemBucket *Bucket, + explicit StringMapConstIterator(StringMapEntryBase **Bucket, bool NoAdvance = false) : Ptr(Bucket) { if (!NoAdvance) AdvancePastEmptyBuckets(); } const value_type &operator*() const { - return *static_cast*>(Ptr->Item); + return *static_cast*>(*Ptr); } const value_type *operator->() const { - return static_cast*>(Ptr->Item); + return static_cast*>(*Ptr); } bool operator==(const StringMapConstIterator &RHS) const { @@ -445,7 +438,7 @@ class StringMapConstIterator { private: void AdvancePastEmptyBuckets() { - while (Ptr->Item == 0 || Ptr->Item == StringMapImpl::getTombstoneVal()) + while (*Ptr == 0 || *Ptr == StringMapImpl::getTombstoneVal()) ++Ptr; } }; @@ -453,15 +446,15 @@ class StringMapConstIterator { template class StringMapIterator : public StringMapConstIterator { public: - explicit StringMapIterator(StringMapImpl::ItemBucket *Bucket, + explicit StringMapIterator(StringMapEntryBase **Bucket, bool NoAdvance = false) : StringMapConstIterator(Bucket, NoAdvance) { } StringMapEntry &operator*() const { - return *static_cast*>(this->Ptr->Item); + return *static_cast*>(*this->Ptr); } StringMapEntry *operator->() const { - return static_cast*>(this->Ptr->Item); + return static_cast*>(*this->Ptr); } }; diff --git a/contrib/llvm/include/llvm/ADT/StringRef.h b/contrib/llvm/include/llvm/ADT/StringRef.h index 8396921744ad..76ba66e746ce 100644 --- a/contrib/llvm/include/llvm/ADT/StringRef.h +++ b/contrib/llvm/include/llvm/ADT/StringRef.h @@ -10,15 +10,26 @@ #ifndef LLVM_ADT_STRINGREF_H #define LLVM_ADT_STRINGREF_H +#include "llvm/Support/type_traits.h" + #include #include -#include +#include #include +#include namespace llvm { template class SmallVectorImpl; class APInt; + class hash_code; + class StringRef; + + /// Helper functions for StringRef::getAsInteger. + bool getAsUnsignedInteger(StringRef Str, unsigned Radix, + unsigned long long &Result); + + bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result); /// StringRef - Represent a constant reference to a string, i.e. a character /// array and a length, which need not be null terminated. @@ -304,14 +315,29 @@ namespace llvm { /// /// If the string is invalid or if only a subset of the string is valid, /// this returns true to signify the error. The string is considered - /// erroneous if empty. + /// erroneous if empty or if it overflows T. /// - bool getAsInteger(unsigned Radix, long long &Result) const; - bool getAsInteger(unsigned Radix, unsigned long long &Result) const; - bool getAsInteger(unsigned Radix, int &Result) const; - bool getAsInteger(unsigned Radix, unsigned &Result) const; + template + typename enable_if_c::is_signed, bool>::type + getAsInteger(unsigned Radix, T &Result) const { + long long LLVal; + if (getAsSignedInteger(*this, Radix, LLVal) || + static_cast(LLVal) != LLVal) + return true; + Result = LLVal; + return false; + } - // TODO: Provide overloads for int/unsigned that check for overflow. + template + typename enable_if_c::is_signed, bool>::type + getAsInteger(unsigned Radix, T &Result) const { + unsigned long long ULLVal; + if (getAsUnsignedInteger(*this, Radix, ULLVal) || + static_cast(ULLVal) != ULLVal) + return true; + Result = ULLVal; + return false; + } /// getAsInteger - Parse the current string as an integer of the /// specified radix, or of an autosensed radix if the radix given @@ -326,6 +352,16 @@ namespace llvm { /// string is well-formed in the given radix. bool getAsInteger(unsigned Radix, APInt &Result) const; + /// @} + /// @name String Operations + /// @{ + + // lower - Convert the given ASCII string to lowercase. + std::string lower() const; + + /// upper - Convert the given ASCII string to uppercase. + std::string upper() const; + /// @} /// @name Substring Operations /// @{ @@ -343,6 +379,20 @@ namespace llvm { Start = min(Start, Length); return StringRef(Data + Start, min(N, Length - Start)); } + + /// drop_front - Return a StringRef equal to 'this' but with the first + /// elements dropped. + StringRef drop_front(unsigned N = 1) const { + assert(size() >= N && "Dropping more elements than exist"); + return substr(N); + } + + /// drop_back - Return a StringRef equal to 'this' but with the last + /// elements dropped. + StringRef drop_back(unsigned N = 1) const { + assert(size() >= N && "Dropping more elements than exist"); + return substr(0, size()-N); + } /// slice - Return a reference to the substring from [Start, End). /// @@ -466,6 +516,9 @@ namespace llvm { /// @} + /// \brief Compute a hash_code for a StringRef. + hash_code hash_value(StringRef S); + // StringRefs can be treated like a POD type. template struct isPodLike; template <> struct isPodLike { static const bool value = true; }; diff --git a/contrib/llvm/include/llvm/ADT/TinyPtrVector.h b/contrib/llvm/include/llvm/ADT/TinyPtrVector.h index ee86d8bdf70f..5014517c9e05 100644 --- a/contrib/llvm/include/llvm/ADT/TinyPtrVector.h +++ b/contrib/llvm/include/llvm/ADT/TinyPtrVector.h @@ -37,6 +37,15 @@ class TinyPtrVector { delete V; } + // implicit conversion operator to ArrayRef. + operator ArrayRef() const { + if (Val.isNull()) + return ArrayRef(); + if (Val.template is()) + return *Val.getAddrOfPtr1(); + return *Val.template get(); + } + bool empty() const { // This vector can be empty if it contains no element, or if it // contains a pointer to an empty vector. @@ -54,18 +63,20 @@ class TinyPtrVector { return Val.template get()->size(); } - typedef const EltTy *iterator; - iterator begin() const { + typedef const EltTy *const_iterator; + typedef EltTy *iterator; + + iterator begin() { if (empty()) return 0; if (Val.template is()) - return Val.template getAddrOf(); + return Val.getAddrOfPtr1(); return Val.template get()->begin(); } - iterator end() const { + iterator end() { if (empty()) return 0; @@ -75,7 +86,14 @@ class TinyPtrVector { return Val.template get()->end(); } - + const_iterator begin() const { + return (const_iterator)const_cast(this)->begin(); + } + + const_iterator end() const { + return (const_iterator)const_cast(this)->end(); + } + EltTy operator[](unsigned i) const { assert(!Val.isNull() && "can't index into an empty vector"); if (EltTy V = Val.template dyn_cast()) { @@ -124,6 +142,20 @@ class TinyPtrVector { } // Otherwise, we're already empty. } + + iterator erase(iterator I) { + // If we have a single value, convert to empty. + if (Val.template is()) { + if (I == begin()) + Val = (EltTy)0; + } else if (VecTy *Vec = Val.template dyn_cast()) { + // multiple items in a vector; just do the erase, there is no + // benefit to collapsing back to a pointer + return Vec->erase(I); + } + + return 0; + } private: void operator=(const TinyPtrVector&); // NOT IMPLEMENTED YET. diff --git a/contrib/llvm/include/llvm/ADT/Trie.h b/contrib/llvm/include/llvm/ADT/Trie.h index 6b150c8fffa0..845af015b052 100644 --- a/contrib/llvm/include/llvm/ADT/Trie.h +++ b/contrib/llvm/include/llvm/ADT/Trie.h @@ -220,8 +220,7 @@ bool Trie::addString(const std::string& s, const Payload& data) { assert(0 && "FIXME!"); return false; case Node::DontMatch: - assert(0 && "Impossible!"); - return false; + llvm_unreachable("Impossible!"); case Node::LabelIsPrefix: s1 = s1.substr(nNode->label().length()); cNode = nNode; @@ -258,8 +257,7 @@ const Payload& Trie::lookup(const std::string& s) const { case Node::StringIsPrefix: return Empty; case Node::DontMatch: - assert(0 && "Impossible!"); - return Empty; + llvm_unreachable("Impossible!"); case Node::LabelIsPrefix: s1 = s1.substr(nNode->label().length()); cNode = nNode; diff --git a/contrib/llvm/include/llvm/ADT/Triple.h b/contrib/llvm/include/llvm/ADT/Triple.h index 3503c0f22145..f5f99d0f1b82 100644 --- a/contrib/llvm/include/llvm/ADT/Triple.h +++ b/contrib/llvm/include/llvm/ADT/Triple.h @@ -43,20 +43,19 @@ class Triple { enum ArchType { UnknownArch, - alpha, // Alpha: alpha arm, // ARM; arm, armv.*, xscale - bfin, // Blackfin: bfin cellspu, // CellSPU: spu, cellspu + hexagon, // Hexagon: hexagon mips, // MIPS: mips, mipsallegrex - mipsel, // MIPSEL: mipsel, mipsallegrexel, psp + mipsel, // MIPSEL: mipsel, mipsallegrexel mips64, // MIPS64: mips64 mips64el,// MIPS64EL: mips64el msp430, // MSP430: msp430 ppc, // PPC: powerpc ppc64, // PPC64: powerpc64, ppu + r600, // R600: AMD GPUs HD2XXX - HD6XXX sparc, // Sparc: sparc sparcv9, // Sparcv9: Sparcv9 - systemz, // SystemZ: s390x tce, // TCE (http://tce.cs.tut.fi/): tce thumb, // Thumb: thumb, thumbv.* x86, // X86: i[3-9]86 @@ -66,16 +65,16 @@ class Triple { ptx32, // PTX: ptx (32-bit) ptx64, // PTX: ptx (64-bit) le32, // le32: generic little-endian 32-bit CPU (PNaCl / Emscripten) - amdil, // amdil: amd IL - - InvalidArch + amdil // amdil: amd IL }; enum VendorType { UnknownVendor, Apple, PC, - SCEI + SCEI, + BGP, + BGQ }; enum OSType { UnknownOS, @@ -93,61 +92,52 @@ class Triple { MinGW32, // i*86-pc-mingw32, *-w64-mingw32 NetBSD, OpenBSD, - Psp, Solaris, Win32, Haiku, Minix, RTEMS, - NativeClient + NativeClient, + CNK // BG/P Compute-Node Kernel }; enum EnvironmentType { UnknownEnvironment, GNU, GNUEABI, + GNUEABIHF, EABI, - MachO + MachO, + ANDROIDEABI }; private: std::string Data; - /// The parsed arch type (or InvalidArch if uninitialized). - mutable ArchType Arch; + /// The parsed arch type. + ArchType Arch; /// The parsed vendor type. - mutable VendorType Vendor; + VendorType Vendor; /// The parsed OS type. - mutable OSType OS; + OSType OS; /// The parsed Environment type. - mutable EnvironmentType Environment; - - bool isInitialized() const { return Arch != InvalidArch; } - static ArchType ParseArch(StringRef ArchName); - static VendorType ParseVendor(StringRef VendorName); - static OSType ParseOS(StringRef OSName); - static EnvironmentType ParseEnvironment(StringRef EnvironmentName); - void Parse() const; + EnvironmentType Environment; public: /// @name Constructors /// @{ - Triple() : Data(), Arch(InvalidArch) {} - explicit Triple(const Twine &Str) : Data(Str.str()), Arch(InvalidArch) {} - Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr) - : Data((ArchStr + Twine('-') + VendorStr + Twine('-') + OSStr).str()), - Arch(InvalidArch) { - } + /// \brief Default constructor is the same as an empty string and leaves all + /// triple fields unknown. + Triple() : Data(), Arch(), Vendor(), OS(), Environment() {} + explicit Triple(const Twine &Str); + Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr); Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr, - const Twine &EnvironmentStr) - : Data((ArchStr + Twine('-') + VendorStr + Twine('-') + OSStr + Twine('-') + - EnvironmentStr).str()), Arch(InvalidArch) { - } + const Twine &EnvironmentStr); /// @} /// @name Normalization @@ -164,22 +154,13 @@ class Triple { /// @{ /// getArch - Get the parsed architecture type of this triple. - ArchType getArch() const { - if (!isInitialized()) Parse(); - return Arch; - } + ArchType getArch() const { return Arch; } /// getVendor - Get the parsed vendor type of this triple. - VendorType getVendor() const { - if (!isInitialized()) Parse(); - return Vendor; - } + VendorType getVendor() const { return Vendor; } /// getOS - Get the parsed operating system type of this triple. - OSType getOS() const { - if (!isInitialized()) Parse(); - return OS; - } + OSType getOS() const { return OS; } /// hasEnvironment - Does this triple have the optional environment /// (fourth) component? @@ -188,11 +169,31 @@ class Triple { } /// getEnvironment - Get the parsed environment type of this triple. - EnvironmentType getEnvironment() const { - if (!isInitialized()) Parse(); - return Environment; + EnvironmentType getEnvironment() const { return Environment; } + + /// getOSVersion - Parse the version number from the OS name component of the + /// triple, if present. + /// + /// For example, "fooos1.2.3" would return (1, 2, 3). + /// + /// If an entry is not defined, it will be returned as 0. + void getOSVersion(unsigned &Major, unsigned &Minor, unsigned &Micro) const; + + /// getOSMajorVersion - Return just the major version number, this is + /// specialized because it is a common query. + unsigned getOSMajorVersion() const { + unsigned Maj, Min, Micro; + getOSVersion(Maj, Min, Micro); + return Maj; } + /// getMacOSXVersion - Parse the version number as with getOSVersion and then + /// translate generic "darwin" versions to the corresponding OS X versions. + /// This may also be called with IOS triples but the OS X version number is + /// just set to a constant 10.4.0 in that case. Returns true if successful. + bool getMacOSXVersion(unsigned &Major, unsigned &Minor, + unsigned &Micro) const; + /// @} /// @name Direct Component Access /// @{ @@ -221,21 +222,28 @@ class Triple { /// if the environment component is present). StringRef getOSAndEnvironmentName() const; - /// getOSVersion - Parse the version number from the OS name component of the - /// triple, if present. - /// - /// For example, "fooos1.2.3" would return (1, 2, 3). - /// - /// If an entry is not defined, it will be returned as 0. - void getOSVersion(unsigned &Major, unsigned &Minor, unsigned &Micro) const; + /// @} + /// @name Convenience Predicates + /// @{ - /// getOSMajorVersion - Return just the major version number, this is - /// specialized because it is a common query. - unsigned getOSMajorVersion() const { - unsigned Maj, Min, Micro; - getOSVersion(Maj, Min, Micro); - return Maj; - } + /// \brief Test whether the architecture is 64-bit + /// + /// Note that this tests for 64-bit pointer width, and nothing else. Note + /// that we intentionally expose only three predicates, 64-bit, 32-bit, and + /// 16-bit. The inner details of pointer width for particular architectures + /// is not summed up in the triple, and so only a coarse grained predicate + /// system is provided. + bool isArch64Bit() const; + + /// \brief Test whether the architecture is 32-bit + /// + /// Note that this tests for 32-bit pointer width, and nothing else. + bool isArch32Bit() const; + + /// \brief Test whether the architecture is 16-bit + /// + /// Note that this tests for 16-bit pointer width, and nothing else. + bool isArch16Bit() const; /// isOSVersionLT - Helper function for doing comparisons against version /// numbers included in the target triple. @@ -254,23 +262,6 @@ class Triple { return false; } - /// isMacOSX - Is this a Mac OS X triple. For legacy reasons, we support both - /// "darwin" and "osx" as OS X triples. - bool isMacOSX() const { - return getOS() == Triple::Darwin || getOS() == Triple::MacOSX; - } - - /// isOSDarwin - Is this a "Darwin" OS (OS X or iOS). - bool isOSDarwin() const { - return isMacOSX() || getOS() == Triple::IOS; - } - - /// isOSWindows - Is this a "Windows" OS. - bool isOSWindows() const { - return getOS() == Triple::Win32 || getOS() == Triple::Cygwin || - getOS() == Triple::MinGW32; - } - /// isMacOSXVersionLT - Comparison function for checking OS X version /// compatibility, which handles supporting skewed version numbering schemes /// used by the "darwin" triples. @@ -287,6 +278,43 @@ class Triple { return isOSVersionLT(Minor + 4, Micro, 0); } + /// isMacOSX - Is this a Mac OS X triple. For legacy reasons, we support both + /// "darwin" and "osx" as OS X triples. + bool isMacOSX() const { + return getOS() == Triple::Darwin || getOS() == Triple::MacOSX; + } + + /// isOSDarwin - Is this a "Darwin" OS (OS X or iOS). + bool isOSDarwin() const { + return isMacOSX() || getOS() == Triple::IOS; + } + + /// \brief Tests for either Cygwin or MinGW OS + bool isOSCygMing() const { + return getOS() == Triple::Cygwin || getOS() == Triple::MinGW32; + } + + /// isOSWindows - Is this a "Windows" OS. + bool isOSWindows() const { + return getOS() == Triple::Win32 || isOSCygMing(); + } + + /// \brief Tests whether the OS uses the ELF binary format. + bool isOSBinFormatELF() const { + return !isOSDarwin() && !isOSWindows(); + } + + /// \brief Tests whether the OS uses the COFF binary format. + bool isOSBinFormatCOFF() const { + return isOSWindows(); + } + + /// \brief Tests whether the environment is MachO. + // FIXME: Should this be an OSBinFormat predicate? + bool isEnvironmentMachO() const { + return getEnvironment() == Triple::MachO || isOSDarwin(); + } + /// @} /// @name Mutators /// @{ @@ -334,6 +362,26 @@ class Triple { /// the target assembler. const char *getArchNameForAssembler(); + /// @} + /// @name Helpers to build variants of a particular triple. + /// @{ + + /// \brief Form a triple with a 32-bit variant of the current architecture. + /// + /// This can be used to move across "families" of architectures where useful. + /// + /// \returns A new triple with a 32-bit architecture or an unknown + /// architecture if no such variant can be found. + llvm::Triple get32BitArchVariant() const; + + /// \brief Form a triple with a 64-bit variant of the current architecture. + /// + /// This can be used to move across "families" of architectures where useful. + /// + /// \returns A new triple with a 64-bit architecture or an unknown + /// architecture if no such variant can be found. + llvm::Triple get64BitArchVariant() const; + /// @} /// @name Static helpers for IDs. /// @{ diff --git a/contrib/llvm/include/llvm/ADT/Twine.h b/contrib/llvm/include/llvm/ADT/Twine.h index 3a60cab77935..9101df8cee37 100644 --- a/contrib/llvm/include/llvm/ADT/Twine.h +++ b/contrib/llvm/include/llvm/ADT/Twine.h @@ -12,6 +12,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" #include #include @@ -425,7 +426,7 @@ namespace llvm { StringRef getSingleStringRef() const { assert(isSingleStringRef() &&"This cannot be had as a single stringref!"); switch (getLHSKind()) { - default: assert(0 && "Out of sync with isSingleStringRef"); + default: llvm_unreachable("Out of sync with isSingleStringRef"); case EmptyKind: return StringRef(); case CStringKind: return StringRef(LHS.cString); case StdStringKind: return StringRef(*LHS.stdString); diff --git a/contrib/llvm/include/llvm/ADT/ValueMap.h b/contrib/llvm/include/llvm/ADT/ValueMap.h index d1f4e5a0dacd..707d07d32cbc 100644 --- a/contrib/llvm/include/llvm/ADT/ValueMap.h +++ b/contrib/llvm/include/llvm/ADT/ValueMap.h @@ -35,7 +35,7 @@ namespace llvm { -template +template class ValueMapCallbackVH; template @@ -72,13 +72,11 @@ struct ValueMapConfig { }; /// See the file comment. -template, - typename ValueInfoT = DenseMapInfo > +template > class ValueMap { - friend class ValueMapCallbackVH; - typedef ValueMapCallbackVH ValueMapCVH; - typedef DenseMap, - ValueInfoT> MapT; + friend class ValueMapCallbackVH; + typedef ValueMapCallbackVH ValueMapCVH; + typedef DenseMap > MapT; typedef typename Config::ExtraData ExtraData; MapT Map; ExtraData Data; @@ -190,11 +188,11 @@ class ValueMap { // This CallbackVH updates its ValueMap when the contained Value changes, // according to the user's preferences expressed through the Config object. -template +template class ValueMapCallbackVH : public CallbackVH { - friend class ValueMap; + friend class ValueMap; friend struct DenseMapInfo; - typedef ValueMap ValueMapT; + typedef ValueMap ValueMapT; typedef typename llvm::remove_pointer::type KeySansPointerT; ValueMapT *Map; @@ -244,9 +242,9 @@ class ValueMapCallbackVH : public CallbackVH { } }; -template -struct DenseMapInfo > { - typedef ValueMapCallbackVH VH; +template +struct DenseMapInfo > { + typedef ValueMapCallbackVH VH; typedef DenseMapInfo PointerInfo; static inline VH getEmptyKey() { diff --git a/contrib/llvm/include/llvm/ADT/VariadicFunction.h b/contrib/llvm/include/llvm/ADT/VariadicFunction.h new file mode 100644 index 000000000000..a9a0dc6b6e20 --- /dev/null +++ b/contrib/llvm/include/llvm/ADT/VariadicFunction.h @@ -0,0 +1,331 @@ +//===--- VariadicFunctions.h - Variadic Functions ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements compile-time type-safe variadic functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_VARIADIC_FUNCTION_H +#define LLVM_ADT_VARIADIC_FUNCTION_H + +#include "llvm/ADT/ArrayRef.h" + +namespace llvm { + +// Define macros to aid in expanding a comma separated series with the index of +// the series pasted onto the last token. +#define LLVM_COMMA_JOIN1(x) x ## 0 +#define LLVM_COMMA_JOIN2(x) LLVM_COMMA_JOIN1(x), x ## 1 +#define LLVM_COMMA_JOIN3(x) LLVM_COMMA_JOIN2(x), x ## 2 +#define LLVM_COMMA_JOIN4(x) LLVM_COMMA_JOIN3(x), x ## 3 +#define LLVM_COMMA_JOIN5(x) LLVM_COMMA_JOIN4(x), x ## 4 +#define LLVM_COMMA_JOIN6(x) LLVM_COMMA_JOIN5(x), x ## 5 +#define LLVM_COMMA_JOIN7(x) LLVM_COMMA_JOIN6(x), x ## 6 +#define LLVM_COMMA_JOIN8(x) LLVM_COMMA_JOIN7(x), x ## 7 +#define LLVM_COMMA_JOIN9(x) LLVM_COMMA_JOIN8(x), x ## 8 +#define LLVM_COMMA_JOIN10(x) LLVM_COMMA_JOIN9(x), x ## 9 +#define LLVM_COMMA_JOIN11(x) LLVM_COMMA_JOIN10(x), x ## 10 +#define LLVM_COMMA_JOIN12(x) LLVM_COMMA_JOIN11(x), x ## 11 +#define LLVM_COMMA_JOIN13(x) LLVM_COMMA_JOIN12(x), x ## 12 +#define LLVM_COMMA_JOIN14(x) LLVM_COMMA_JOIN13(x), x ## 13 +#define LLVM_COMMA_JOIN15(x) LLVM_COMMA_JOIN14(x), x ## 14 +#define LLVM_COMMA_JOIN16(x) LLVM_COMMA_JOIN15(x), x ## 15 +#define LLVM_COMMA_JOIN17(x) LLVM_COMMA_JOIN16(x), x ## 16 +#define LLVM_COMMA_JOIN18(x) LLVM_COMMA_JOIN17(x), x ## 17 +#define LLVM_COMMA_JOIN19(x) LLVM_COMMA_JOIN18(x), x ## 18 +#define LLVM_COMMA_JOIN20(x) LLVM_COMMA_JOIN19(x), x ## 19 +#define LLVM_COMMA_JOIN21(x) LLVM_COMMA_JOIN20(x), x ## 20 +#define LLVM_COMMA_JOIN22(x) LLVM_COMMA_JOIN21(x), x ## 21 +#define LLVM_COMMA_JOIN23(x) LLVM_COMMA_JOIN22(x), x ## 22 +#define LLVM_COMMA_JOIN24(x) LLVM_COMMA_JOIN23(x), x ## 23 +#define LLVM_COMMA_JOIN25(x) LLVM_COMMA_JOIN24(x), x ## 24 +#define LLVM_COMMA_JOIN26(x) LLVM_COMMA_JOIN25(x), x ## 25 +#define LLVM_COMMA_JOIN27(x) LLVM_COMMA_JOIN26(x), x ## 26 +#define LLVM_COMMA_JOIN28(x) LLVM_COMMA_JOIN27(x), x ## 27 +#define LLVM_COMMA_JOIN29(x) LLVM_COMMA_JOIN28(x), x ## 28 +#define LLVM_COMMA_JOIN30(x) LLVM_COMMA_JOIN29(x), x ## 29 +#define LLVM_COMMA_JOIN31(x) LLVM_COMMA_JOIN30(x), x ## 30 +#define LLVM_COMMA_JOIN32(x) LLVM_COMMA_JOIN31(x), x ## 31 + +/// \brief Class which can simulate a type-safe variadic function. +/// +/// The VariadicFunction class template makes it easy to define +/// type-safe variadic functions where all arguments have the same +/// type. +/// +/// Suppose we need a variadic function like this: +/// +/// ResultT Foo(const ArgT &A_0, const ArgT &A_1, ..., const ArgT &A_N); +/// +/// Instead of many overloads of Foo(), we only need to define a helper +/// function that takes an array of arguments: +/// +/// ResultT FooImpl(ArrayRef Args) { +/// // 'Args[i]' is a pointer to the i-th argument passed to Foo(). +/// ... +/// } +/// +/// and then define Foo() like this: +/// +/// const VariadicFunction Foo; +/// +/// VariadicFunction takes care of defining the overloads of Foo(). +/// +/// Actually, Foo is a function object (i.e. functor) instead of a plain +/// function. This object is stateless and its constructor/destructor +/// does nothing, so it's safe to create global objects and call Foo(...) at +/// any time. +/// +/// Sometimes we need a variadic function to have some fixed leading +/// arguments whose types may be different from that of the optional +/// arguments. For example: +/// +/// bool FullMatch(const StringRef &S, const RE &Regex, +/// const ArgT &A_0, ..., const ArgT &A_N); +/// +/// VariadicFunctionN is for such cases, where N is the number of fixed +/// arguments. It is like VariadicFunction, except that it takes N more +/// template arguments for the types of the fixed arguments: +/// +/// bool FullMatchImpl(const StringRef &S, const RE &Regex, +/// ArrayRef Args) { ... } +/// const VariadicFunction2 +/// FullMatch; +/// +/// Currently VariadicFunction and friends support up-to 3 +/// fixed leading arguments and up-to 32 optional arguments. +template )> +struct VariadicFunction { + ResultT operator()() const { + return Func(ArrayRef()); + } + +#define LLVM_DEFINE_OVERLOAD(N) \ + ResultT operator()(LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \ + const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \ + return Func(makeArrayRef(Args)); \ + } + LLVM_DEFINE_OVERLOAD(1) + LLVM_DEFINE_OVERLOAD(2) + LLVM_DEFINE_OVERLOAD(3) + LLVM_DEFINE_OVERLOAD(4) + LLVM_DEFINE_OVERLOAD(5) + LLVM_DEFINE_OVERLOAD(6) + LLVM_DEFINE_OVERLOAD(7) + LLVM_DEFINE_OVERLOAD(8) + LLVM_DEFINE_OVERLOAD(9) + LLVM_DEFINE_OVERLOAD(10) + LLVM_DEFINE_OVERLOAD(11) + LLVM_DEFINE_OVERLOAD(12) + LLVM_DEFINE_OVERLOAD(13) + LLVM_DEFINE_OVERLOAD(14) + LLVM_DEFINE_OVERLOAD(15) + LLVM_DEFINE_OVERLOAD(16) + LLVM_DEFINE_OVERLOAD(17) + LLVM_DEFINE_OVERLOAD(18) + LLVM_DEFINE_OVERLOAD(19) + LLVM_DEFINE_OVERLOAD(20) + LLVM_DEFINE_OVERLOAD(21) + LLVM_DEFINE_OVERLOAD(22) + LLVM_DEFINE_OVERLOAD(23) + LLVM_DEFINE_OVERLOAD(24) + LLVM_DEFINE_OVERLOAD(25) + LLVM_DEFINE_OVERLOAD(26) + LLVM_DEFINE_OVERLOAD(27) + LLVM_DEFINE_OVERLOAD(28) + LLVM_DEFINE_OVERLOAD(29) + LLVM_DEFINE_OVERLOAD(30) + LLVM_DEFINE_OVERLOAD(31) + LLVM_DEFINE_OVERLOAD(32) +#undef LLVM_DEFINE_OVERLOAD +}; + +template )> +struct VariadicFunction1 { + ResultT operator()(Param0T P0) const { + return Func(P0, ArrayRef()); + } + +#define LLVM_DEFINE_OVERLOAD(N) \ + ResultT operator()(Param0T P0, LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \ + const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \ + return Func(P0, makeArrayRef(Args)); \ + } + LLVM_DEFINE_OVERLOAD(1) + LLVM_DEFINE_OVERLOAD(2) + LLVM_DEFINE_OVERLOAD(3) + LLVM_DEFINE_OVERLOAD(4) + LLVM_DEFINE_OVERLOAD(5) + LLVM_DEFINE_OVERLOAD(6) + LLVM_DEFINE_OVERLOAD(7) + LLVM_DEFINE_OVERLOAD(8) + LLVM_DEFINE_OVERLOAD(9) + LLVM_DEFINE_OVERLOAD(10) + LLVM_DEFINE_OVERLOAD(11) + LLVM_DEFINE_OVERLOAD(12) + LLVM_DEFINE_OVERLOAD(13) + LLVM_DEFINE_OVERLOAD(14) + LLVM_DEFINE_OVERLOAD(15) + LLVM_DEFINE_OVERLOAD(16) + LLVM_DEFINE_OVERLOAD(17) + LLVM_DEFINE_OVERLOAD(18) + LLVM_DEFINE_OVERLOAD(19) + LLVM_DEFINE_OVERLOAD(20) + LLVM_DEFINE_OVERLOAD(21) + LLVM_DEFINE_OVERLOAD(22) + LLVM_DEFINE_OVERLOAD(23) + LLVM_DEFINE_OVERLOAD(24) + LLVM_DEFINE_OVERLOAD(25) + LLVM_DEFINE_OVERLOAD(26) + LLVM_DEFINE_OVERLOAD(27) + LLVM_DEFINE_OVERLOAD(28) + LLVM_DEFINE_OVERLOAD(29) + LLVM_DEFINE_OVERLOAD(30) + LLVM_DEFINE_OVERLOAD(31) + LLVM_DEFINE_OVERLOAD(32) +#undef LLVM_DEFINE_OVERLOAD +}; + +template )> +struct VariadicFunction2 { + ResultT operator()(Param0T P0, Param1T P1) const { + return Func(P0, P1, ArrayRef()); + } + +#define LLVM_DEFINE_OVERLOAD(N) \ + ResultT operator()(Param0T P0, Param1T P1, \ + LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \ + const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \ + return Func(P0, P1, makeAraryRef(Args)); \ + } + LLVM_DEFINE_OVERLOAD(1) + LLVM_DEFINE_OVERLOAD(2) + LLVM_DEFINE_OVERLOAD(3) + LLVM_DEFINE_OVERLOAD(4) + LLVM_DEFINE_OVERLOAD(5) + LLVM_DEFINE_OVERLOAD(6) + LLVM_DEFINE_OVERLOAD(7) + LLVM_DEFINE_OVERLOAD(8) + LLVM_DEFINE_OVERLOAD(9) + LLVM_DEFINE_OVERLOAD(10) + LLVM_DEFINE_OVERLOAD(11) + LLVM_DEFINE_OVERLOAD(12) + LLVM_DEFINE_OVERLOAD(13) + LLVM_DEFINE_OVERLOAD(14) + LLVM_DEFINE_OVERLOAD(15) + LLVM_DEFINE_OVERLOAD(16) + LLVM_DEFINE_OVERLOAD(17) + LLVM_DEFINE_OVERLOAD(18) + LLVM_DEFINE_OVERLOAD(19) + LLVM_DEFINE_OVERLOAD(20) + LLVM_DEFINE_OVERLOAD(21) + LLVM_DEFINE_OVERLOAD(22) + LLVM_DEFINE_OVERLOAD(23) + LLVM_DEFINE_OVERLOAD(24) + LLVM_DEFINE_OVERLOAD(25) + LLVM_DEFINE_OVERLOAD(26) + LLVM_DEFINE_OVERLOAD(27) + LLVM_DEFINE_OVERLOAD(28) + LLVM_DEFINE_OVERLOAD(29) + LLVM_DEFINE_OVERLOAD(30) + LLVM_DEFINE_OVERLOAD(31) + LLVM_DEFINE_OVERLOAD(32) +#undef LLVM_DEFINE_OVERLOAD +}; + +template )> +struct VariadicFunction3 { + ResultT operator()(Param0T P0, Param1T P1, Param2T P2) const { + return Func(P0, P1, P2, ArrayRef()); + } + +#define LLVM_DEFINE_OVERLOAD(N) \ + ResultT operator()(Param0T P0, Param1T P1, Param2T P2, \ + LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \ + const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \ + return Func(P0, P1, P2, makeArrayRef(Args)); \ + } + LLVM_DEFINE_OVERLOAD(1) + LLVM_DEFINE_OVERLOAD(2) + LLVM_DEFINE_OVERLOAD(3) + LLVM_DEFINE_OVERLOAD(4) + LLVM_DEFINE_OVERLOAD(5) + LLVM_DEFINE_OVERLOAD(6) + LLVM_DEFINE_OVERLOAD(7) + LLVM_DEFINE_OVERLOAD(8) + LLVM_DEFINE_OVERLOAD(9) + LLVM_DEFINE_OVERLOAD(10) + LLVM_DEFINE_OVERLOAD(11) + LLVM_DEFINE_OVERLOAD(12) + LLVM_DEFINE_OVERLOAD(13) + LLVM_DEFINE_OVERLOAD(14) + LLVM_DEFINE_OVERLOAD(15) + LLVM_DEFINE_OVERLOAD(16) + LLVM_DEFINE_OVERLOAD(17) + LLVM_DEFINE_OVERLOAD(18) + LLVM_DEFINE_OVERLOAD(19) + LLVM_DEFINE_OVERLOAD(20) + LLVM_DEFINE_OVERLOAD(21) + LLVM_DEFINE_OVERLOAD(22) + LLVM_DEFINE_OVERLOAD(23) + LLVM_DEFINE_OVERLOAD(24) + LLVM_DEFINE_OVERLOAD(25) + LLVM_DEFINE_OVERLOAD(26) + LLVM_DEFINE_OVERLOAD(27) + LLVM_DEFINE_OVERLOAD(28) + LLVM_DEFINE_OVERLOAD(29) + LLVM_DEFINE_OVERLOAD(30) + LLVM_DEFINE_OVERLOAD(31) + LLVM_DEFINE_OVERLOAD(32) +#undef LLVM_DEFINE_OVERLOAD +}; + +// Cleanup the macro namespace. +#undef LLVM_COMMA_JOIN1 +#undef LLVM_COMMA_JOIN2 +#undef LLVM_COMMA_JOIN3 +#undef LLVM_COMMA_JOIN4 +#undef LLVM_COMMA_JOIN5 +#undef LLVM_COMMA_JOIN6 +#undef LLVM_COMMA_JOIN7 +#undef LLVM_COMMA_JOIN8 +#undef LLVM_COMMA_JOIN9 +#undef LLVM_COMMA_JOIN10 +#undef LLVM_COMMA_JOIN11 +#undef LLVM_COMMA_JOIN12 +#undef LLVM_COMMA_JOIN13 +#undef LLVM_COMMA_JOIN14 +#undef LLVM_COMMA_JOIN15 +#undef LLVM_COMMA_JOIN16 +#undef LLVM_COMMA_JOIN17 +#undef LLVM_COMMA_JOIN18 +#undef LLVM_COMMA_JOIN19 +#undef LLVM_COMMA_JOIN20 +#undef LLVM_COMMA_JOIN21 +#undef LLVM_COMMA_JOIN22 +#undef LLVM_COMMA_JOIN23 +#undef LLVM_COMMA_JOIN24 +#undef LLVM_COMMA_JOIN25 +#undef LLVM_COMMA_JOIN26 +#undef LLVM_COMMA_JOIN27 +#undef LLVM_COMMA_JOIN28 +#undef LLVM_COMMA_JOIN29 +#undef LLVM_COMMA_JOIN30 +#undef LLVM_COMMA_JOIN31 +#undef LLVM_COMMA_JOIN32 + +} // end namespace llvm + +#endif // LLVM_ADT_VARIADIC_FUNCTION_H diff --git a/contrib/llvm/include/llvm/ADT/VectorExtras.h b/contrib/llvm/include/llvm/ADT/VectorExtras.h deleted file mode 100644 index e05f585996f9..000000000000 --- a/contrib/llvm/include/llvm/ADT/VectorExtras.h +++ /dev/null @@ -1,41 +0,0 @@ -//===-- llvm/ADT/VectorExtras.h - Helpers for std::vector -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains helper functions which are useful for working with the -// std::vector class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ADT_VECTOREXTRAS_H -#define LLVM_ADT_VECTOREXTRAS_H - -#include -#include - -namespace llvm { - -/// make_vector - Helper function which is useful for building temporary vectors -/// to pass into type construction of CallInst ctors. This turns a null -/// terminated list of pointers (or other value types) into a real live vector. -/// -template -inline std::vector make_vector(T A, ...) { - va_list Args; - va_start(Args, A); - std::vector Result; - Result.push_back(A); - while (T Val = va_arg(Args, T)) - Result.push_back(Val); - va_end(Args); - return Result; -} - -} // End llvm namespace - -#endif diff --git a/contrib/llvm/include/llvm/ADT/edit_distance.h b/contrib/llvm/include/llvm/ADT/edit_distance.h new file mode 100644 index 000000000000..f77ef13fef2b --- /dev/null +++ b/contrib/llvm/include/llvm/ADT/edit_distance.h @@ -0,0 +1,102 @@ +//===-- llvm/ADT/edit_distance.h - Array edit distance function --- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a Levenshtein distance function that works for any two +// sequences, with each element of each sequence being analogous to a character +// in a string. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_EDIT_DISTANCE_H +#define LLVM_ADT_EDIT_DISTANCE_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/OwningPtr.h" +#include + +namespace llvm { + +/// \brief Determine the edit distance between two sequences. +/// +/// \param FromArray the first sequence to compare. +/// +/// \param ToArray the second sequence to compare. +/// +/// \param AllowReplacements whether to allow element replacements (change one +/// element into another) as a single operation, rather than as two operations +/// (an insertion and a removal). +/// +/// \param MaxEditDistance If non-zero, the maximum edit distance that this +/// routine is allowed to compute. If the edit distance will exceed that +/// maximum, returns \c MaxEditDistance+1. +/// +/// \returns the minimum number of element insertions, removals, or (if +/// \p AllowReplacements is \c true) replacements needed to transform one of +/// the given sequences into the other. If zero, the sequences are identical. +template +unsigned ComputeEditDistance(ArrayRef FromArray, ArrayRef ToArray, + bool AllowReplacements = true, + unsigned MaxEditDistance = 0) { + // The algorithm implemented below is the "classic" + // dynamic-programming algorithm for computing the Levenshtein + // distance, which is described here: + // + // http://en.wikipedia.org/wiki/Levenshtein_distance + // + // Although the algorithm is typically described using an m x n + // array, only two rows are used at a time, so this implemenation + // just keeps two separate vectors for those two rows. + typename ArrayRef::size_type m = FromArray.size(); + typename ArrayRef::size_type n = ToArray.size(); + + const unsigned SmallBufferSize = 64; + unsigned SmallBuffer[SmallBufferSize]; + llvm::OwningArrayPtr Allocated; + unsigned *Previous = SmallBuffer; + if (2*(n + 1) > SmallBufferSize) { + Previous = new unsigned [2*(n+1)]; + Allocated.reset(Previous); + } + unsigned *Current = Previous + (n + 1); + + for (unsigned i = 0; i <= n; ++i) + Previous[i] = i; + + for (typename ArrayRef::size_type y = 1; y <= m; ++y) { + Current[0] = y; + unsigned BestThisRow = Current[0]; + + for (typename ArrayRef::size_type x = 1; x <= n; ++x) { + if (AllowReplacements) { + Current[x] = std::min( + Previous[x-1] + (FromArray[y-1] == ToArray[x-1] ? 0u : 1u), + std::min(Current[x-1], Previous[x])+1); + } + else { + if (FromArray[y-1] == ToArray[x-1]) Current[x] = Previous[x-1]; + else Current[x] = std::min(Current[x-1], Previous[x]) + 1; + } + BestThisRow = std::min(BestThisRow, Current[x]); + } + + if (MaxEditDistance && BestThisRow > MaxEditDistance) + return MaxEditDistance + 1; + + unsigned *tmp = Current; + Current = Previous; + Previous = tmp; + } + + unsigned Result = Previous[n]; + return Result; +} + +} // End llvm namespace + +#endif diff --git a/contrib/llvm/include/llvm/ADT/ilist.h b/contrib/llvm/include/llvm/ADT/ilist.h index bcacfd9df426..ba9864a98a7e 100644 --- a/contrib/llvm/include/llvm/ADT/ilist.h +++ b/contrib/llvm/include/llvm/ADT/ilist.h @@ -652,10 +652,6 @@ struct ilist : public iplist { void push_front(const NodeTy &val) { insert(this->begin(), val); } void push_back(const NodeTy &val) { insert(this->end(), val); } - // Special forms of insert... - template void insert(iterator where, InIt first, InIt last) { - for (; first != last; ++first) insert(where, *first); - } void insert(iterator where, size_type count, const NodeTy &val) { for (; count != 0; --count) insert(where, val); } diff --git a/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h index d71ba208a129..b823f71a2217 100644 --- a/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -327,7 +327,7 @@ class AliasAnalysis { } /// doesAccessArgPointees - Return true if functions with the specified - /// behavior are known to potentially read or write from objects pointed + /// behavior are known to potentially read or write from objects pointed /// to be their pointer-typed arguments (with arbitrary offsets). /// static bool doesAccessArgPointees(ModRefBehavior MRB) { @@ -568,6 +568,11 @@ bool isNoAliasCall(const Value *V); /// bool isIdentifiedObject(const Value *V); +/// isKnownNonNull - Return true if this pointer couldn't possibly be null by +/// its definition. This returns true for allocas, non-extern-weak globals and +/// byval arguments. +bool isKnownNonNull(const Value *V); + } // End llvm namespace #endif diff --git a/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h b/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h index c4ebe4015bf8..95626d624a13 100644 --- a/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h +++ b/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h @@ -264,6 +264,7 @@ class AliasSet : public ilist_node { } void setVolatile() { Volatile = true; } +public: /// aliasesPointer - Return true if the specified pointer "may" (or must) /// alias one of the members in the set. /// diff --git a/contrib/llvm/include/llvm/Analysis/BlockFrequencyImpl.h b/contrib/llvm/include/llvm/Analysis/BlockFrequencyImpl.h index 0fb2bd7db50e..6f2ccfb19901 100644 --- a/contrib/llvm/include/llvm/Analysis/BlockFrequencyImpl.h +++ b/contrib/llvm/include/llvm/Analysis/BlockFrequencyImpl.h @@ -24,7 +24,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include -#include #include namespace llvm { @@ -41,7 +40,7 @@ class MachineBlockFrequencyInfo; template class BlockFrequencyImpl { - DenseMap Freqs; + DenseMap Freqs; BlockProbInfoT *BPI; @@ -52,15 +51,16 @@ class BlockFrequencyImpl { const uint32_t EntryFreq; std::string getBlockName(BasicBlock *BB) const { - return BB->getNameStr(); + return BB->getName().str(); } std::string getBlockName(MachineBasicBlock *MBB) const { - std::stringstream ss; + std::string str; + raw_string_ostream ss(str); ss << "BB#" << MBB->getNumber(); if (const BasicBlock *BB = MBB->getBasicBlock()) - ss << " derived from LLVM BB " << BB->getNameStr(); + ss << " derived from LLVM BB " << BB->getName(); return ss.str(); } @@ -308,8 +308,9 @@ class BlockFrequencyImpl { public: /// getBlockFreq - Return block frequency. Return 0 if we don't have it. - BlockFrequency getBlockFreq(BlockT *BB) const { - typename DenseMap::const_iterator I = Freqs.find(BB); + BlockFrequency getBlockFreq(const BlockT *BB) const { + typename DenseMap::const_iterator + I = Freqs.find(BB); if (I != Freqs.end()) return I->second; return 0; diff --git a/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h b/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h index 9e698a9f4bb1..fcab90677a48 100644 --- a/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h +++ b/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h @@ -47,7 +47,7 @@ class BlockFrequencyInfo : public FunctionPass { /// that we should not rely on the value itself, but only on the comparison to /// the other block frequencies. We do this to avoid using of floating points. /// - BlockFrequency getBlockFreq(BasicBlock *BB) const; + BlockFrequency getBlockFreq(const BasicBlock *BB) const; }; } diff --git a/contrib/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/contrib/llvm/include/llvm/Analysis/BranchProbabilityInfo.h index a2c12ab9e824..2ced7967ed5b 100644 --- a/contrib/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/contrib/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -17,13 +17,82 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/BranchProbability.h" namespace llvm { - +class LoopInfo; class raw_ostream; +/// \brief Analysis pass providing branch probability information. +/// +/// This is a function analysis pass which provides information on the relative +/// probabilities of each "edge" in the function's CFG where such an edge is +/// defined by a pair of basic blocks. The probability for a given block and +/// a successor block are always relative to the probabilities of the other +/// successor blocks. Another way of looking at it is that the probabilities +/// for a given block B and each of its successors should sum to exactly +/// one (100%). class BranchProbabilityInfo : public FunctionPass { +public: + static char ID; + + BranchProbabilityInfo() : FunctionPass(ID) { + initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnFunction(Function &F); + void print(raw_ostream &OS, const Module *M = 0) const; + + /// \brief Get an edge's probability, relative to other out-edges of the Src. + /// + /// This routine provides access to the fractional probability between zero + /// (0%) and one (100%) of this edge executing, relative to other edges + /// leaving the 'Src' block. The returned probability is never zero, and can + /// only be one if the source block has only one successor. + BranchProbability getEdgeProbability(const BasicBlock *Src, + const BasicBlock *Dst) const; + + /// \brief Test if an edge is hot relative to other out-edges of the Src. + /// + /// Check whether this edge out of the source block is 'hot'. We define hot + /// as having a relative probability >= 80%. + bool isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const; + + /// \brief Retrieve the hot successor of a block if one exists. + /// + /// Given a basic block, look through its successors and if one exists for + /// which \see isEdgeHot would return true, return that successor block. + BasicBlock *getHotSucc(BasicBlock *BB) const; + + /// \brief Print an edge's probability. + /// + /// Retrieves an edge's probability similarly to \see getEdgeProbability, but + /// then prints that probability to the provided stream. That stream is then + /// returned. + raw_ostream &printEdgeProbability(raw_ostream &OS, const BasicBlock *Src, + const BasicBlock *Dst) const; + + /// \brief Get the raw edge weight calculated for the block pair. + /// + /// This returns the raw edge weight. It is guaranteed to fall between 1 and + /// UINT32_MAX. Note that the raw edge weight is not meaningful in isolation. + /// This interface should be very carefully, and primarily by routines that + /// are updating the analysis by later calling setEdgeWeight. + uint32_t getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const; + + /// \brief Set the raw edge weight for the block pair. + /// + /// This allows a pass to explicitly set the edge weight for a block. It can + /// be used when updating the CFG to update and preserve the branch + /// probability information. Read the implementation of how these edge + /// weights are calculated carefully before using! + void setEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst, + uint32_t Weight); + +private: + typedef std::pair Edge; // Default weight value. Used when we don't have information about the edge. // TODO: DEFAULT_WEIGHT makes sense during static predication, when none of @@ -33,49 +102,26 @@ class BranchProbabilityInfo : public FunctionPass { // weight to just "inherit" the non-zero weight of an adjacent successor. static const uint32_t DEFAULT_WEIGHT = 16; - typedef std::pair Edge; - DenseMap Weights; - // Get sum of the block successors' weights. + /// \brief Handle to the LoopInfo analysis. + LoopInfo *LI; + + /// \brief Track the last function we run over for printing. + Function *LastF; + + /// \brief Track the set of blocks directly succeeded by a returning block. + SmallPtrSet PostDominatedByUnreachable; + + /// \brief Get sum of the block successors' weights. uint32_t getSumForBlock(const BasicBlock *BB) const; -public: - static char ID; - - BranchProbabilityInfo() : FunctionPass(ID) { - initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const; - - bool runOnFunction(Function &F); - - // Returned value is between 1 and UINT32_MAX. Look at - // BranchProbabilityInfo.cpp for details. - uint32_t getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const; - - // Look at BranchProbabilityInfo.cpp for details. Use it with caution! - void setEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst, - uint32_t Weight); - - // A 'Hot' edge is an edge which probability is >= 80%. - bool isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const; - - // Return a hot successor for the block BB or null if there isn't one. - BasicBlock *getHotSucc(BasicBlock *BB) const; - - // Return a probability as a fraction between 0 (0% probability) and - // 1 (100% probability), however the value is never equal to 0, and can be 1 - // only iff SRC block has only one successor. - BranchProbability getEdgeProbability(const BasicBlock *Src, - const BasicBlock *Dst) const; - - // Print value between 0 (0% probability) and 1 (100% probability), - // however the value is never equal to 0, and can be 1 only iff SRC block - // has only one successor. - raw_ostream &printEdgeProbability(raw_ostream &OS, BasicBlock *Src, - BasicBlock *Dst) const; + bool calcUnreachableHeuristics(BasicBlock *BB); + bool calcMetadataWeights(BasicBlock *BB); + bool calcPointerHeuristics(BasicBlock *BB); + bool calcLoopBranchHeuristics(BasicBlock *BB); + bool calcZeroHeuristics(BasicBlock *BB); + bool calcFloatingPointHeuristics(BasicBlock *BB); }; } diff --git a/contrib/llvm/include/llvm/Analysis/CFGPrinter.h b/contrib/llvm/include/llvm/Analysis/CFGPrinter.h index 61614e34dacc..4704a929acf6 100644 --- a/contrib/llvm/include/llvm/Analysis/CFGPrinter.h +++ b/contrib/llvm/include/llvm/Analysis/CFGPrinter.h @@ -29,13 +29,13 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const Function *F) { - return "CFG for '" + F->getNameStr() + "' function"; + return "CFG for '" + F->getName().str() + "' function"; } static std::string getSimpleNodeLabel(const BasicBlock *Node, - const Function *Graph) { + const Function *) { if (!Node->getName().empty()) - return Node->getNameStr(); + return Node->getName().str(); std::string Str; raw_string_ostream OS(Str); @@ -45,7 +45,7 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { } static std::string getCompleteNodeLabel(const BasicBlock *Node, - const Function *Graph) { + const Function *) { std::string Str; raw_string_ostream OS(Str); @@ -95,7 +95,9 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { std::string Str; raw_string_ostream OS(Str); - OS << SI->getCaseValue(SuccNo)->getValue(); + SwitchInst::ConstCaseIt Case = + SwitchInst::ConstCaseIt::fromSuccessorIndex(SI, SuccNo); + OS << Case.getCaseValue()->getValue(); return OS.str(); } return ""; diff --git a/contrib/llvm/include/llvm/Analysis/CaptureTracking.h b/contrib/llvm/include/llvm/Analysis/CaptureTracking.h index b3390f47d2f3..9b5e8425ad29 100644 --- a/contrib/llvm/include/llvm/Analysis/CaptureTracking.h +++ b/contrib/llvm/include/llvm/Analysis/CaptureTracking.h @@ -14,9 +14,12 @@ #ifndef LLVM_ANALYSIS_CAPTURETRACKING_H #define LLVM_ANALYSIS_CAPTURETRACKING_H -namespace llvm { - class Value; +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Support/CallSite.h" +namespace llvm { /// PointerMayBeCaptured - Return true if this pointer value may be captured /// by the enclosing function (which is required to exist). This routine can /// be expensive, so consider caching the results. The boolean ReturnCaptures @@ -28,6 +31,33 @@ namespace llvm { bool ReturnCaptures, bool StoreCaptures); + /// This callback is used in conjunction with PointerMayBeCaptured. In + /// addition to the interface here, you'll need to provide your own getters + /// to see whether anything was captured. + struct CaptureTracker { + virtual ~CaptureTracker(); + + /// tooManyUses - The depth of traversal has breached a limit. There may be + /// capturing instructions that will not be passed into captured(). + virtual void tooManyUses() = 0; + + /// shouldExplore - This is the use of a value derived from the pointer. + /// To prune the search (ie., assume that none of its users could possibly + /// capture) return false. To search it, return true. + /// + /// U->getUser() is always an Instruction. + virtual bool shouldExplore(Use *U) = 0; + + /// captured - Information about the pointer was captured by the user of + /// use U. Return true to stop the traversal or false to continue looking + /// for more capturing instructions. + virtual bool captured(Use *U) = 0; + }; + + /// PointerMayBeCaptured - Visit the value and the values derived from it and + /// find values which appear to be capturing the pointer value. This feeds + /// results into and is controlled by the CaptureTracker object. + void PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker); } // end namespace llvm #endif diff --git a/contrib/llvm/include/llvm/Analysis/CodeMetrics.h b/contrib/llvm/include/llvm/Analysis/CodeMetrics.h index d96dd82b3591..711607834921 100644 --- a/contrib/llvm/include/llvm/Analysis/CodeMetrics.h +++ b/contrib/llvm/include/llvm/Analysis/CodeMetrics.h @@ -1,4 +1,4 @@ -//===- CodeMetrics.h - Measures the weight of a function---------*- C++ -*-===// +//===- CodeMetrics.h - Code cost measurements -------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,80 +18,75 @@ #include "llvm/ADT/DenseMap.h" namespace llvm { - + class BasicBlock; + class Function; + class Instruction; class TargetData; + class Value; - // CodeMetrics - Calculate size and a few similar metrics for a set of - // basic blocks. + /// \brief Check whether an instruction is likely to be "free" when lowered. + bool isInstructionFree(const Instruction *I, const TargetData *TD = 0); + + /// \brief Check whether a call will lower to something small. + /// + /// This tests checks whether calls to this function will lower to something + /// significantly cheaper than a traditional call, often a single + /// instruction. + bool callIsSmall(const Function *F); + + /// \brief Utility to calculate the size and a few similar metrics for a set + /// of basic blocks. struct CodeMetrics { - /// NeverInline - True if this callee should never be inlined into a - /// caller. - // bool NeverInline; + /// \brief True if this function contains a call to setjmp or other functions + /// with attribute "returns twice" without having the attribute itself. + bool exposesReturnsTwice; - // True if this function contains a call to setjmp or _setjmp - bool callsSetJmp; - - // True if this function calls itself + /// \brief True if this function calls itself. bool isRecursive; - // True if this function contains one or more indirect branches + /// \brief True if this function contains one or more indirect branches. bool containsIndirectBr; - /// usesDynamicAlloca - True if this function calls alloca (in the C sense). + /// \brief True if this function calls alloca (in the C sense). bool usesDynamicAlloca; - /// NumInsts, NumBlocks - Keep track of how large each function is, which - /// is used to estimate the code size cost of inlining it. - unsigned NumInsts, NumBlocks; + /// \brief Number of instructions in the analyzed blocks. + unsigned NumInsts; - /// NumBBInsts - Keeps track of basic block code size estimates. + /// \brief Number of analyzed blocks. + unsigned NumBlocks; + + /// \brief Keeps track of basic block code size estimates. DenseMap NumBBInsts; - /// NumCalls - Keep track of the number of calls to 'big' functions. + /// \brief Keep track of the number of calls to 'big' functions. unsigned NumCalls; - /// NumInlineCandidates - Keep track of the number of calls to internal - /// functions with only a single caller. These are likely targets for - /// future inlining, likely exposed by interleaved devirtualization. + /// \brief The number of calls to internal functions with a single caller. + /// + /// These are likely targets for future inlining, likely exposed by + /// interleaved devirtualization. unsigned NumInlineCandidates; - /// NumVectorInsts - Keep track of how many instructions produce vector - /// values. The inliner is being more aggressive with inlining vector - /// kernels. + /// \brief How many instructions produce vector values. + /// + /// The inliner is more aggressive with inlining vector kernels. unsigned NumVectorInsts; - /// NumRets - Keep track of how many Ret instructions the block contains. + /// \brief How many 'ret' instructions the blocks contain. unsigned NumRets; - CodeMetrics() : callsSetJmp(false), isRecursive(false), + CodeMetrics() : exposesReturnsTwice(false), isRecursive(false), containsIndirectBr(false), usesDynamicAlloca(false), NumInsts(0), NumBlocks(0), NumCalls(0), NumInlineCandidates(0), NumVectorInsts(0), NumRets(0) {} - /// analyzeBasicBlock - Add information about the specified basic block - /// to the current structure. + /// \brief Add information about a block to the current state. void analyzeBasicBlock(const BasicBlock *BB, const TargetData *TD = 0); - /// analyzeFunction - Add information about the specified function - /// to the current structure. + /// \brief Add information about a function to the current state. void analyzeFunction(Function *F, const TargetData *TD = 0); - - /// CountCodeReductionForConstant - Figure out an approximation for how - /// many instructions will be constant folded if the specified value is - /// constant. - unsigned CountCodeReductionForConstant(Value *V); - - /// CountBonusForConstant - Figure out an approximation for how much - /// per-call performance boost we can expect if the specified value is - /// constant. - unsigned CountBonusForConstant(Value *V); - - /// CountCodeReductionForAlloca - Figure out an approximation of how much - /// smaller the function will be if it is inlined into a context where an - /// argument becomes an alloca. - /// - unsigned CountCodeReductionForAlloca(Value *V); }; } diff --git a/contrib/llvm/include/llvm/Analysis/ConstantFolding.h b/contrib/llvm/include/llvm/Analysis/ConstantFolding.h index 05018fa1617a..2fdef5f0836e 100644 --- a/contrib/llvm/include/llvm/Analysis/ConstantFolding.h +++ b/contrib/llvm/include/llvm/Analysis/ConstantFolding.h @@ -25,6 +25,7 @@ namespace llvm { class ConstantExpr; class Instruction; class TargetData; + class TargetLibraryInfo; class Function; class Type; template @@ -35,13 +36,15 @@ namespace llvm { /// Note that this fails if not all of the operands are constant. Otherwise, /// this function can only fail when attempting to fold instructions like loads /// and stores, which have no constant expression form. -Constant *ConstantFoldInstruction(Instruction *I, const TargetData *TD = 0); +Constant *ConstantFoldInstruction(Instruction *I, const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0); /// ConstantFoldConstantExpression - Attempt to fold the constant expression /// using the specified TargetData. If successful, the constant result is /// result is returned, if not, null is returned. Constant *ConstantFoldConstantExpression(const ConstantExpr *CE, - const TargetData *TD = 0); + const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0); /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the /// specified operands. If successful, the constant result is returned, if not, @@ -51,7 +54,8 @@ Constant *ConstantFoldConstantExpression(const ConstantExpr *CE, /// Constant *ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, ArrayRef Ops, - const TargetData *TD = 0); + const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0); /// ConstantFoldCompareInstOperands - Attempt to constant fold a compare /// instruction (icmp/fcmp) with the specified operands. If it fails, it @@ -59,7 +63,8 @@ Constant *ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, /// Constant *ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, - const TargetData *TD = 0); + const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0); /// ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue /// instruction with the specified operands and indices. The constant result is @@ -76,15 +81,22 @@ Constant *ConstantFoldLoadFromConstPtr(Constant *C, const TargetData *TD = 0); /// getelementptr constantexpr, return the constant value being addressed by the /// constant expression, or null if something is funny and we can't decide. Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE); - + +/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr +/// indices (with an *implied* zero pointer index that is not in the list), +/// return the constant value being addressed by a virtual load, or null if +/// something is funny and we can't decide. +Constant *ConstantFoldLoadThroughGEPIndices(Constant *C, + ArrayRef Indices); + /// canConstantFoldCallTo - Return true if its even possible to fold a call to /// the specified function. bool canConstantFoldCallTo(const Function *F); /// ConstantFoldCall - Attempt to constant fold a call to the specified function /// with the specified arguments, returning null if unsuccessful. -Constant * -ConstantFoldCall(Function *F, ArrayRef Operands); +Constant *ConstantFoldCall(Function *F, ArrayRef Operands, + const TargetLibraryInfo *TLI = 0); } #endif diff --git a/contrib/llvm/include/llvm/Analysis/DIBuilder.h b/contrib/llvm/include/llvm/Analysis/DIBuilder.h index ee24226c748f..2d109cdbf08f 100644 --- a/contrib/llvm/include/llvm/Analysis/DIBuilder.h +++ b/contrib/llvm/include/llvm/Analysis/DIBuilder.h @@ -42,6 +42,7 @@ namespace llvm { class DISubprogram; class DITemplateTypeParameter; class DITemplateValueParameter; + class DIObjCProperty; class DIBuilder { private: @@ -190,6 +191,39 @@ namespace llvm { StringRef PropertySetterName = StringRef(), unsigned PropertyAttributes = 0); + /// createObjCIVar - Create debugging information entry for Objective-C + /// instance variable. + /// @param Name Member name. + /// @param File File where this member is defined. + /// @param LineNo Line number. + /// @param SizeInBits Member size. + /// @param AlignInBits Member alignment. + /// @param OffsetInBits Member offset. + /// @param Flags Flags to encode member attribute, e.g. private + /// @param Ty Parent type. + /// @param Property Property associated with this ivar. + DIType createObjCIVar(StringRef Name, DIFile File, + unsigned LineNo, uint64_t SizeInBits, + uint64_t AlignInBits, uint64_t OffsetInBits, + unsigned Flags, DIType Ty, + MDNode *PropertyNode); + + /// createObjCProperty - Create debugging information entry for Objective-C + /// property. + /// @param Name Property name. + /// @param File File where this property is defined. + /// @param LineNumber Line number. + /// @param GetterName Name of the Objective C property getter selector. + /// @param SetterName Name of the Objective C property setter selector. + /// @param PropertyAttributes Objective C property attributes. + /// @param Ty Type. + DIObjCProperty createObjCProperty(StringRef Name, + DIFile File, unsigned LineNumber, + StringRef GetterName, + StringRef SetterName, + unsigned PropertyAttributes, + DIType Ty); + /// createClassType - Create debugging information entry for a class. /// @param Scope Scope in which this class is defined. /// @param Name class name. @@ -313,6 +347,10 @@ namespace llvm { DIType createTemporaryType(); DIType createTemporaryType(DIFile F); + /// createForwardDecl - Create a temporary forward-declared type. + DIType createForwardDecl(unsigned Tag, StringRef Name, DIFile F, + unsigned Line, unsigned RuntimeLang = 0); + /// retainType - Retain DIType in a module even if it is not referenced /// through debug info anchors. void retainType(DIType T); @@ -407,6 +445,7 @@ namespace llvm { /// @param Ty Function type. /// @param isLocalToUnit True if this function is not externally visible.. /// @param isDefinition True if this is a function definition. + /// @param ScopeLine Set to the beginning of the scope this starts /// @param Flags e.g. is this function prototyped or not. /// This flags are used to emit dwarf attributes. /// @param isOptimized True if optimization is ON. @@ -417,6 +456,7 @@ namespace llvm { DIFile File, unsigned LineNo, DIType Ty, bool isLocalToUnit, bool isDefinition, + unsigned ScopeLine, unsigned Flags = 0, bool isOptimized = false, Function *Fn = 0, @@ -470,7 +510,7 @@ namespace llvm { /// @param Scope Lexical block. /// @param File Source file. DILexicalBlockFile createLexicalBlockFile(DIDescriptor Scope, - DIFile File); + DIFile File); /// createLexicalBlock - This creates a descriptor for a lexical block /// with the specified parent context. diff --git a/contrib/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h b/contrib/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h index 30741c4970ab..b701b8fca5d4 100644 --- a/contrib/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h +++ b/contrib/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h @@ -31,7 +31,7 @@ struct DOTGraphTraitsViewer : public FunctionPass { std::string Title, GraphName; Graph = &getAnalysis(); GraphName = DOTGraphTraits::getGraphName(Graph); - Title = GraphName + " for '" + F.getNameStr() + "' function"; + Title = GraphName + " for '" + F.getName().str() + "' function"; ViewGraph(Graph, Name, Simple, Title); return false; @@ -55,7 +55,7 @@ struct DOTGraphTraitsPrinter : public FunctionPass { virtual bool runOnFunction(Function &F) { Analysis *Graph; - std::string Filename = Name + "." + F.getNameStr() + ".dot"; + std::string Filename = Name + "." + F.getName().str() + ".dot"; errs() << "Writing '" << Filename << "'..."; std::string ErrorInfo; @@ -64,7 +64,7 @@ struct DOTGraphTraitsPrinter : public FunctionPass { std::string Title, GraphName; GraphName = DOTGraphTraits::getGraphName(Graph); - Title = GraphName + " for '" + F.getNameStr() + "' function"; + Title = GraphName + " for '" + F.getName().str() + "' function"; if (ErrorInfo.empty()) WriteGraph(File, Graph, Simple, Title); diff --git a/contrib/llvm/include/llvm/Analysis/DebugInfo.h b/contrib/llvm/include/llvm/Analysis/DebugInfo.h index 9a53c4dadba0..894c5428b988 100644 --- a/contrib/llvm/include/llvm/Analysis/DebugInfo.h +++ b/contrib/llvm/include/llvm/Analysis/DebugInfo.h @@ -43,6 +43,7 @@ namespace llvm { class DILexicalBlockFile; class DIVariable; class DIType; + class DIObjCProperty; /// DIDescriptor - A thin wraper around MDNode to access encoded debug info. /// This should not be stored in a container, because underly MDNode may @@ -128,6 +129,7 @@ namespace llvm { bool isUnspecifiedParameter() const; bool isTemplateTypeParameter() const; bool isTemplateValueParameter() const; + bool isObjCProperty() const; }; /// DISubrange - This is used to represent ranges, for array bounds. @@ -135,8 +137,8 @@ namespace llvm { public: explicit DISubrange(const MDNode *N = 0) : DIDescriptor(N) {} - int64_t getLo() const { return (int64_t)getUInt64Field(1); } - int64_t getHi() const { return (int64_t)getUInt64Field(2); } + uint64_t getLo() const { return getUInt64Field(1); } + uint64_t getHi() const { return getUInt64Field(2); } }; /// DIArray - This descriptor holds an array of descriptors. @@ -153,6 +155,7 @@ namespace llvm { /// DIScope - A base class for various scopes. class DIScope : public DIDescriptor { + virtual void anchor(); public: explicit DIScope(const MDNode *N = 0) : DIDescriptor (N) {} virtual ~DIScope() {} @@ -163,6 +166,7 @@ namespace llvm { /// DICompileUnit - A wrapper for a compile unit. class DICompileUnit : public DIScope { + virtual void anchor(); public: explicit DICompileUnit(const MDNode *N = 0) : DIScope(N) {} @@ -202,6 +206,7 @@ namespace llvm { /// DIFile - This is a wrapper for a file. class DIFile : public DIScope { + virtual void anchor(); public: explicit DIFile(const MDNode *N = 0) : DIScope(N) { if (DbgNode && !isFile()) @@ -230,7 +235,7 @@ namespace llvm { /// FIXME: Types should be factored much better so that CV qualifiers and /// others do not require a huge and empty descriptor full of zeros. class DIType : public DIScope { - public: + virtual void anchor(); protected: // This ctor is used when the Tag has already been validated by a derived // ctor. @@ -240,7 +245,6 @@ namespace llvm { /// Verify - Verify that a type descriptor is well formed. bool Verify() const; - public: explicit DIType(const MDNode *N); explicit DIType() {} virtual ~DIType() {} @@ -320,6 +324,7 @@ namespace llvm { /// DIBasicType - A basic type, like 'int' or 'float'. class DIBasicType : public DIType { + virtual void anchor(); public: explicit DIBasicType(const MDNode *N = 0) : DIType(N) {} @@ -338,6 +343,7 @@ namespace llvm { /// DIDerivedType - A simple derived type, like a const qualified type, /// a typedef, a pointer or reference, etc. class DIDerivedType : public DIType { + virtual void anchor(); protected: explicit DIDerivedType(const MDNode *N, bool, bool) : DIType(N, true, true) {} @@ -351,29 +357,45 @@ namespace llvm { /// return base type size. uint64_t getOriginalTypeSize() const; - StringRef getObjCPropertyName() const { return getStringField(10); } + /// getObjCProperty - Return property node, if this ivar is + /// associated with one. + MDNode *getObjCProperty() const; + + StringRef getObjCPropertyName() const { + if (getVersion() > LLVMDebugVersion11) + return StringRef(); + return getStringField(10); + } StringRef getObjCPropertyGetterName() const { + assert (getVersion() <= LLVMDebugVersion11 && "Invalid Request"); return getStringField(11); } StringRef getObjCPropertySetterName() const { + assert (getVersion() <= LLVMDebugVersion11 && "Invalid Request"); return getStringField(12); } bool isReadOnlyObjCProperty() { + assert (getVersion() <= LLVMDebugVersion11 && "Invalid Request"); return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_readonly) != 0; } bool isReadWriteObjCProperty() { + assert (getVersion() <= LLVMDebugVersion11 && "Invalid Request"); return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_readwrite) != 0; } bool isAssignObjCProperty() { + assert (getVersion() <= LLVMDebugVersion11 && "Invalid Request"); return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_assign) != 0; } bool isRetainObjCProperty() { + assert (getVersion() <= LLVMDebugVersion11 && "Invalid Request"); return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_retain) != 0; } bool isCopyObjCProperty() { + assert (getVersion() <= LLVMDebugVersion11 && "Invalid Request"); return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_copy) != 0; } bool isNonAtomicObjCProperty() { + assert (getVersion() <= LLVMDebugVersion11 && "Invalid Request"); return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_nonatomic) != 0; } @@ -391,6 +413,7 @@ namespace llvm { /// other types, like a function or struct. /// FIXME: Why is this a DIDerivedType?? class DICompositeType : public DIDerivedType { + virtual void anchor(); public: explicit DICompositeType(const MDNode *N = 0) : DIDerivedType(N, true, true) { @@ -454,6 +477,7 @@ namespace llvm { /// DISubprogram - This is a wrapper for a subprogram (e.g. a function). class DISubprogram : public DIScope { + virtual void anchor(); public: explicit DISubprogram(const MDNode *N = 0) : DIScope(N) {} @@ -495,6 +519,7 @@ namespace llvm { DICompositeType getContainingType() const { return getFieldAs(13); } + unsigned isArtificial() const { if (getVersion() <= llvm::LLVMDebugVersion8) return getUnsignedField(14); @@ -543,6 +568,11 @@ namespace llvm { return getFieldAs(6).getDirectory(); } + /// getScopeLineNumber - Get the beginning of the scope of the + /// function, not necessarily where the name of the program + /// starts. + unsigned getScopeLineNumber() const { return getUnsignedField(20); } + /// Verify - Verify that a subprogram descriptor is well formed. bool Verify() const; @@ -621,7 +651,7 @@ namespace llvm { DIScope getContext() const { return getFieldAs(1); } StringRef getName() const { return getStringField(2); } - DICompileUnit getCompileUnit() const{ + DICompileUnit getCompileUnit() const { assert (getVersion() <= LLVMDebugVersion10 && "Invalid getCompileUnit!"); if (getVersion() == llvm::LLVMDebugVersion7) return getFieldAs(3); @@ -687,6 +717,7 @@ namespace llvm { /// DILexicalBlock - This is a wrapper for a lexical block. class DILexicalBlock : public DIScope { + virtual void anchor(); public: explicit DILexicalBlock(const MDNode *N = 0) : DIScope(N) {} DIScope getContext() const { return getFieldAs(1); } @@ -705,6 +736,7 @@ namespace llvm { /// DILexicalBlockFile - This is a wrapper for a lexical block with /// a filename change. class DILexicalBlockFile : public DIScope { + virtual void anchor(); public: explicit DILexicalBlockFile(const MDNode *N = 0) : DIScope(N) {} DIScope getContext() const { return getScope().getContext(); } @@ -724,6 +756,7 @@ namespace llvm { /// DINameSpace - A wrapper for a C++ style name space. class DINameSpace : public DIScope { + virtual void anchor(); public: explicit DINameSpace(const MDNode *N = 0) : DIScope(N) {} DIScope getContext() const { return getFieldAs(1); } @@ -760,6 +793,51 @@ namespace llvm { bool Verify() const; }; + class DIObjCProperty : public DIDescriptor { + public: + explicit DIObjCProperty(const MDNode *N) : DIDescriptor(N) { } + + StringRef getObjCPropertyName() const { return getStringField(1); } + DIFile getFile() const { return getFieldAs(2); } + unsigned getLineNumber() const { return getUnsignedField(3); } + + StringRef getObjCPropertyGetterName() const { + return getStringField(4); + } + StringRef getObjCPropertySetterName() const { + return getStringField(5); + } + bool isReadOnlyObjCProperty() { + return (getUnsignedField(6) & dwarf::DW_APPLE_PROPERTY_readonly) != 0; + } + bool isReadWriteObjCProperty() { + return (getUnsignedField(6) & dwarf::DW_APPLE_PROPERTY_readwrite) != 0; + } + bool isAssignObjCProperty() { + return (getUnsignedField(6) & dwarf::DW_APPLE_PROPERTY_assign) != 0; + } + bool isRetainObjCProperty() { + return (getUnsignedField(6) & dwarf::DW_APPLE_PROPERTY_retain) != 0; + } + bool isCopyObjCProperty() { + return (getUnsignedField(6) & dwarf::DW_APPLE_PROPERTY_copy) != 0; + } + bool isNonAtomicObjCProperty() { + return (getUnsignedField(6) & dwarf::DW_APPLE_PROPERTY_nonatomic) != 0; + } + + DIType getType() const { return getFieldAs(7); } + + /// Verify - Verify that a derived type descriptor is well formed. + bool Verify() const; + + /// print - print derived type. + void print(raw_ostream &OS) const; + + /// dump - print derived type to dbgs() with a newline. + void dump() const; + }; + /// getDISubprogram - Find subprogram that is enclosing this scope. DISubprogram getDISubprogram(const MDNode *Scope); @@ -816,7 +894,7 @@ namespace llvm { /// addGlobalVariable - Add global variable into GVs. bool addGlobalVariable(DIGlobalVariable DIG); - // addSubprogram - Add subprgoram into SPs. + // addSubprogram - Add subprogram into SPs. bool addSubprogram(DISubprogram SP); /// addType - Add type into Tys. diff --git a/contrib/llvm/include/llvm/Analysis/DominanceFrontier.h b/contrib/llvm/include/llvm/Analysis/DominanceFrontier.h index d7f74af1c65c..a2e0675e92b7 100644 --- a/contrib/llvm/include/llvm/Analysis/DominanceFrontier.h +++ b/contrib/llvm/include/llvm/Analysis/DominanceFrontier.h @@ -154,6 +154,7 @@ class DominanceFrontierBase : public FunctionPass { /// used to compute a forward dominator frontiers. /// class DominanceFrontier : public DominanceFrontierBase { + virtual void anchor(); public: static char ID; // Pass ID, replacement for typeid DominanceFrontier() : diff --git a/contrib/llvm/include/llvm/Analysis/DominatorInternals.h b/contrib/llvm/include/llvm/Analysis/DominatorInternals.h index ae552b05abff..0c29236dde96 100644 --- a/contrib/llvm/include/llvm/Analysis/DominatorInternals.h +++ b/contrib/llvm/include/llvm/Analysis/DominatorInternals.h @@ -171,7 +171,7 @@ void Calculate(DominatorTreeBase::NodeType>& DT, // it might be that some blocks did not get a DFS number (e.g., blocks of // infinite loops). In these cases an artificial exit node is required. - MultipleRoots |= (DT.isPostDominator() && N != F.size()); + MultipleRoots |= (DT.isPostDominator() && N != GraphTraits::size(&F)); // When naively implemented, the Lengauer-Tarjan algorithm requires a separate // bucket for each vertex. However, this is unnecessary, because each vertex diff --git a/contrib/llvm/include/llvm/Analysis/Dominators.h b/contrib/llvm/include/llvm/Analysis/Dominators.h index 230e83d30121..6e8e4246367e 100644 --- a/contrib/llvm/include/llvm/Analysis/Dominators.h +++ b/contrib/llvm/include/llvm/Analysis/Dominators.h @@ -185,6 +185,18 @@ void Calculate(DominatorTreeBase::NodeType>& DT, template class DominatorTreeBase : public DominatorBase { + bool dominatedBySlowTreeWalk(const DomTreeNodeBase *A, + const DomTreeNodeBase *B) const { + assert(A != B); + assert(isReachableFromEntry(B)); + assert(isReachableFromEntry(A)); + + const DomTreeNodeBase *IDom; + while ((IDom = B->getIDom()) != 0 && IDom != A && IDom != B) + B = IDom; // Walk up the tree + return IDom != 0; + } + protected: typedef DenseMap*> DomTreeNodeMapType; DomTreeNodeMapType DomTreeNodes; @@ -321,8 +333,7 @@ class DominatorTreeBase : public DominatorBase { /// block. This is the same as using operator[] on this class. /// inline DomTreeNodeBase *getNode(NodeT *BB) const { - typename DomTreeNodeMapType::const_iterator I = DomTreeNodes.find(BB); - return I != DomTreeNodes.end() ? I->second : 0; + return DomTreeNodes.lookup(BB); } /// getRootNode - This returns the entry node for the CFG of the function. If @@ -339,38 +350,26 @@ class DominatorTreeBase : public DominatorBase { /// Note that this is not a constant time operation! /// bool properlyDominates(const DomTreeNodeBase *A, - const DomTreeNodeBase *B) const { - if (A == 0 || B == 0) return false; - return dominatedBySlowTreeWalk(A, B); - } - - inline bool properlyDominates(const NodeT *A, const NodeT *B) { + const DomTreeNodeBase *B) { + if (A == 0 || B == 0) + return false; if (A == B) return false; - - // Cast away the const qualifiers here. This is ok since - // this function doesn't actually return the values returned - // from getNode. - return properlyDominates(getNode(const_cast(A)), - getNode(const_cast(B))); - } - - bool dominatedBySlowTreeWalk(const DomTreeNodeBase *A, - const DomTreeNodeBase *B) const { - const DomTreeNodeBase *IDom; - if (A == 0 || B == 0) return false; - while ((IDom = B->getIDom()) != 0 && IDom != A && IDom != B) - B = IDom; // Walk up the tree - return IDom != 0; + return dominates(A, B); } + bool properlyDominates(const NodeT *A, const NodeT *B); /// isReachableFromEntry - Return true if A is dominated by the entry /// block of the function containing it. - bool isReachableFromEntry(const NodeT* A) { + bool isReachableFromEntry(const NodeT* A) const { assert(!this->isPostDominator() && "This is not implemented for post dominators"); - return dominates(&A->getParent()->front(), A); + return isReachableFromEntry(getNode(const_cast(A))); + } + + inline bool isReachableFromEntry(const DomTreeNodeBase *A) const { + return A; } /// dominates - Returns true iff A dominates B. Note that this is not a @@ -378,10 +377,16 @@ class DominatorTreeBase : public DominatorBase { /// inline bool dominates(const DomTreeNodeBase *A, const DomTreeNodeBase *B) { + // A node trivially dominates itself. if (B == A) - return true; // A node trivially dominates itself. + return true; - if (A == 0 || B == 0) + // An unreachable node is dominated by anything. + if (!isReachableFromEntry(B)) + return true; + + // And dominates nothing. + if (!isReachableFromEntry(A)) return false; // Compare the result of the tree walk and the dfs numbers, if expensive @@ -406,16 +411,7 @@ class DominatorTreeBase : public DominatorBase { return dominatedBySlowTreeWalk(A, B); } - inline bool dominates(const NodeT *A, const NodeT *B) { - if (A == B) - return true; - - // Cast away the const qualifiers here. This is ok since - // this function doesn't actually return the values returned - // from getNode. - return dominates(getNode(const_cast(A)), - getNode(const_cast(B))); - } + bool dominates(const NodeT *A, const NodeT *B); NodeT *getRoot() const { assert(this->Roots.size() == 1 && "Should always have entry node!"); @@ -623,9 +619,8 @@ class DominatorTreeBase : public DominatorBase { } DomTreeNodeBase *getNodeForBlock(NodeT *BB) { - typename DomTreeNodeMapType::iterator I = this->DomTreeNodes.find(BB); - if (I != this->DomTreeNodes.end() && I->second) - return I->second; + if (DomTreeNodeBase *Node = getNode(BB)) + return Node; // Haven't calculated this node yet? Get or calculate the node for the // immediate dominator. @@ -641,8 +636,7 @@ class DominatorTreeBase : public DominatorBase { } inline NodeT *getIDom(NodeT *BB) const { - typename DenseMap::const_iterator I = IDoms.find(BB); - return I != IDoms.end() ? I->second : 0; + return IDoms.lookup(BB); } inline void addRoot(NodeT* BB) { @@ -653,21 +647,24 @@ class DominatorTreeBase : public DominatorBase { /// recalculate - compute a dominator tree for the given function template void recalculate(FT& F) { + typedef GraphTraits TraitsTy; reset(); this->Vertex.push_back(0); if (!this->IsPostDominators) { // Initialize root - this->Roots.push_back(&F.front()); - this->IDoms[&F.front()] = 0; - this->DomTreeNodes[&F.front()] = 0; + NodeT *entry = TraitsTy::getEntryNode(&F); + this->Roots.push_back(entry); + this->IDoms[entry] = 0; + this->DomTreeNodes[entry] = 0; Calculate(*this, F); } else { // Initialize the roots list - for (typename FT::iterator I = F.begin(), E = F.end(); I != E; ++I) { - if (std::distance(GraphTraits::child_begin(I), - GraphTraits::child_end(I)) == 0) + for (typename TraitsTy::nodes_iterator I = TraitsTy::nodes_begin(&F), + E = TraitsTy::nodes_end(&F); I != E; ++I) { + if (std::distance(TraitsTy::child_begin(I), + TraitsTy::child_end(I)) == 0) addRoot(I); // Prepopulate maps so that we don't get iterator invalidation issues later. @@ -680,6 +677,32 @@ class DominatorTreeBase : public DominatorBase { } }; +// These two functions are declared out of line as a workaround for building +// with old (< r147295) versions of clang because of pr11642. +template +bool DominatorTreeBase::dominates(const NodeT *A, const NodeT *B) { + if (A == B) + return true; + + // Cast away the const qualifiers here. This is ok since + // this function doesn't actually return the values returned + // from getNode. + return dominates(getNode(const_cast(A)), + getNode(const_cast(B))); +} +template +bool +DominatorTreeBase::properlyDominates(const NodeT *A, const NodeT *B) { + if (A == B) + return false; + + // Cast away the const qualifiers here. This is ok since + // this function doesn't actually return the values returned + // from getNode. + return dominates(getNode(const_cast(A)), + getNode(const_cast(B))); +} + EXTERN_TEMPLATE_INSTANTIATION(class DominatorTreeBase); //===------------------------------------- @@ -749,9 +772,12 @@ class DominatorTree : public FunctionPass { return DT->dominates(A, B); } - // dominates - Return true if A dominates B. This performs the - // special checks necessary if A and B are in the same basic block. - bool dominates(const Instruction *A, const Instruction *B) const; + // dominates - Return true if Def dominates a use in User. This performs + // the special checks necessary if Def and User are in the same basic block. + // Note that Def doesn't dominate a use in Def itself! + bool dominates(const Instruction *Def, const Use &U) const; + bool dominates(const Instruction *Def, const Instruction *User) const; + bool dominates(const Instruction *Def, const BasicBlock *BB) const; bool properlyDominates(const DomTreeNode *A, const DomTreeNode *B) const { return DT->properlyDominates(A, B); @@ -814,10 +840,12 @@ class DominatorTree : public FunctionPass { DT->splitBlock(NewBB); } - bool isReachableFromEntry(const BasicBlock* A) { + bool isReachableFromEntry(const BasicBlock* A) const { return DT->isReachableFromEntry(A); } + bool isReachableFromEntry(const Use &U) const; + virtual void releaseMemory() { DT->releaseMemory(); diff --git a/contrib/llvm/include/llvm/Analysis/IVUsers.h b/contrib/llvm/include/llvm/Analysis/IVUsers.h index 2fb607cc5c37..2bf79b9c932b 100644 --- a/contrib/llvm/include/llvm/Analysis/IVUsers.h +++ b/contrib/llvm/include/llvm/Analysis/IVUsers.h @@ -166,10 +166,16 @@ class IVUsers : public LoopPass { const_iterator end() const { return IVUses.end(); } bool empty() const { return IVUses.empty(); } + bool isIVUserOrOperand(Instruction *Inst) const { + return Processed.count(Inst); + } + void print(raw_ostream &OS, const Module* = 0) const; /// dump - This method is used for debugging. void dump() const; +protected: + bool AddUsersImpl(Instruction *I, SmallPtrSet &SimpleLoopNests); }; Pass *createIVUsersPass(); diff --git a/contrib/llvm/include/llvm/Analysis/InlineCost.h b/contrib/llvm/include/llvm/Analysis/InlineCost.h index 36a16e68df07..691c2d19be9a 100644 --- a/contrib/llvm/include/llvm/Analysis/InlineCost.h +++ b/contrib/llvm/include/llvm/Analysis/InlineCost.h @@ -14,171 +14,118 @@ #ifndef LLVM_ANALYSIS_INLINECOST_H #define LLVM_ANALYSIS_INLINECOST_H +#include "llvm/Function.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/ValueMap.h" +#include "llvm/Analysis/CodeMetrics.h" #include #include #include -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/ValueMap.h" -#include "llvm/Analysis/CodeMetrics.h" namespace llvm { - class Value; - class Function; - class BasicBlock; class CallSite; - template - class SmallPtrSet; class TargetData; namespace InlineConstants { // Various magic constants used to adjust heuristics. const int InstrCost = 5; - const int IndirectCallBonus = -100; + const int IndirectCallThreshold = 100; const int CallPenalty = 25; const int LastCallToStaticBonus = -15000; const int ColdccPenalty = 2000; const int NoreturnPenalty = 10000; } - /// InlineCost - Represent the cost of inlining a function. This - /// supports special values for functions which should "always" or - /// "never" be inlined. Otherwise, the cost represents a unitless - /// amount; smaller values increase the likelihood of the function - /// being inlined. + /// \brief Represents the cost of inlining a function. + /// + /// This supports special values for functions which should "always" or + /// "never" be inlined. Otherwise, the cost represents a unitless amount; + /// smaller values increase the likelihood of the function being inlined. + /// + /// Objects of this type also provide the adjusted threshold for inlining + /// based on the information available for a particular callsite. They can be + /// directly tested to determine if inlining should occur given the cost and + /// threshold for this cost metric. class InlineCost { - enum Kind { - Value, - Always, - Never + enum SentinelValues { + AlwaysInlineCost = INT_MIN, + NeverInlineCost = INT_MAX }; - // This is a do-it-yourself implementation of - // int Cost : 30; - // unsigned Type : 2; - // We used to use bitfields, but they were sometimes miscompiled (PR3822). - enum { TYPE_BITS = 2 }; - enum { COST_BITS = unsigned(sizeof(unsigned)) * CHAR_BIT - TYPE_BITS }; - unsigned TypedCost; // int Cost : COST_BITS; unsigned Type : TYPE_BITS; + /// \brief The estimated cost of inlining this callsite. + const int Cost; - Kind getType() const { - return Kind(TypedCost >> COST_BITS); - } + /// \brief The adjusted threshold against which this cost was computed. + const int Threshold; - int getCost() const { - // Sign-extend the bottom COST_BITS bits. - return (int(TypedCost << TYPE_BITS)) >> TYPE_BITS; - } + // Trivial constructor, interesting logic in the factory functions below. + InlineCost(int Cost, int Threshold) + : Cost(Cost), Threshold(Threshold) {} - InlineCost(int C, int T) { - TypedCost = (unsigned(C << TYPE_BITS) >> TYPE_BITS) | (T << COST_BITS); - assert(getCost() == C && "Cost exceeds InlineCost precision"); - } public: - static InlineCost get(int Cost) { return InlineCost(Cost, Value); } - static InlineCost getAlways() { return InlineCost(0, Always); } - static InlineCost getNever() { return InlineCost(0, Never); } - - bool isVariable() const { return getType() == Value; } - bool isAlways() const { return getType() == Always; } - bool isNever() const { return getType() == Never; } - - /// getValue() - Return a "variable" inline cost's amount. It is - /// an error to call this on an "always" or "never" InlineCost. - int getValue() const { - assert(getType() == Value && "Invalid access of InlineCost"); - return getCost(); + static InlineCost get(int Cost, int Threshold) { + assert(Cost > AlwaysInlineCost && "Cost crosses sentinel value"); + assert(Cost < NeverInlineCost && "Cost crosses sentinel value"); + return InlineCost(Cost, Threshold); } + static InlineCost getAlways() { + return InlineCost(AlwaysInlineCost, 0); + } + static InlineCost getNever() { + return InlineCost(NeverInlineCost, 0); + } + + /// \brief Test whether the inline cost is low enough for inlining. + operator bool() const { + return Cost < Threshold; + } + + bool isAlways() const { return Cost == AlwaysInlineCost; } + bool isNever() const { return Cost == NeverInlineCost; } + bool isVariable() const { return !isAlways() && !isNever(); } + + /// \brief Get the inline cost estimate. + /// It is an error to call this on an "always" or "never" InlineCost. + int getCost() const { + assert(isVariable() && "Invalid access of InlineCost"); + return Cost; + } + + /// \brief Get the cost delta from the threshold for inlining. + /// Only valid if the cost is of the variable kind. Returns a negative + /// value if the cost is too high to inline. + int getCostDelta() const { return Threshold - getCost(); } }; /// InlineCostAnalyzer - Cost analyzer used by inliner. class InlineCostAnalyzer { - struct ArgInfo { - public: - unsigned ConstantWeight; - unsigned AllocaWeight; - - ArgInfo(unsigned CWeight, unsigned AWeight) - : ConstantWeight(CWeight), AllocaWeight(AWeight) - {} - }; - - struct FunctionInfo { - CodeMetrics Metrics; - - /// ArgumentWeights - Each formal argument of the function is inspected to - /// see if it is used in any contexts where making it a constant or alloca - /// would reduce the code size. If so, we add some value to the argument - /// entry here. - std::vector ArgumentWeights; - - /// analyzeFunction - Add information about the specified function - /// to the current structure. - void analyzeFunction(Function *F, const TargetData *TD); - - /// NeverInline - Returns true if the function should never be - /// inlined into any caller. - bool NeverInline(); - }; - - // The Function* for a function can be changed (by ArgumentPromotion); - // the ValueMap will update itself when this happens. - ValueMap CachedFunctionInfo; - // TargetData if available, or null. const TargetData *TD; - int CountBonusForConstant(Value *V, Constant *C = NULL); - int ConstantFunctionBonus(CallSite CS, Constant *C); - int getInlineSize(CallSite CS, Function *Callee); - int getInlineBonuses(CallSite CS, Function *Callee); public: InlineCostAnalyzer(): TD(0) {} void setTargetData(const TargetData *TData) { TD = TData; } - /// getInlineCost - The heuristic used to determine if we should inline the - /// function call or not. + /// \brief Get an InlineCost object representing the cost of inlining this + /// callsite. /// - InlineCost getInlineCost(CallSite CS, - SmallPtrSet &NeverInline); + /// Note that threshold is passed into this function. Only costs below the + /// threshold are computed with any accuracy. The threshold can be used to + /// bound the computation necessary to determine whether the cost is + /// sufficiently low to warrant inlining. + InlineCost getInlineCost(CallSite CS, int Threshold); /// getCalledFunction - The heuristic used to determine if we should inline /// the function call or not. The callee is explicitly specified, to allow - /// you to calculate the cost of inlining a function via a pointer. The - /// result assumes that the inlined version will always be used. You should - /// weight it yourself in cases where this callee will not always be called. - InlineCost getInlineCost(CallSite CS, - Function *Callee, - SmallPtrSet &NeverInline); - - /// getSpecializationBonus - The heuristic used to determine the per-call - /// performance boost for using a specialization of Callee with argument - /// SpecializedArgNos replaced by a constant. - int getSpecializationBonus(Function *Callee, - SmallVectorImpl &SpecializedArgNo); - - /// getSpecializationCost - The heuristic used to determine the code-size - /// impact of creating a specialized version of Callee with argument - /// SpecializedArgNo replaced by a constant. - InlineCost getSpecializationCost(Function *Callee, - SmallVectorImpl &SpecializedArgNo); - - /// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a - /// higher threshold to determine if the function call should be inlined. - float getInlineFudgeFactor(CallSite CS); - - /// resetCachedFunctionInfo - erase any cached cost info for this function. - void resetCachedCostInfo(Function* Caller) { - CachedFunctionInfo[Caller] = FunctionInfo(); - } - - /// growCachedCostInfo - update the cached cost info for Caller after Callee - /// has been inlined. If Callee is NULL it means a dead call has been - /// eliminated. - void growCachedCostInfo(Function* Caller, Function* Callee); - - /// clear - empty the cache of inline costs - void clear(); + /// you to calculate the cost of inlining a function via a pointer. This + /// behaves exactly as the version with no explicit callee parameter in all + /// other respects. + // + // Note: This is used by out-of-tree passes, please do not remove without + // adding a replacement API. + InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold); }; /// callIsSmall - If a call is likely to lower to a single target instruction, diff --git a/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h b/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h index c1d87d3f7712..152e885bf667 100644 --- a/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h +++ b/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h @@ -20,147 +20,198 @@ #define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H namespace llvm { - class DominatorTree; - class Instruction; - class Value; - class TargetData; template class ArrayRef; + class DominatorTree; + class Instruction; + class TargetData; + class TargetLibraryInfo; + class Type; + class Value; /// SimplifyAddInst - Given operands for an Add, see if we can /// fold the result. If not, this returns null. Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, - const TargetData *TD = 0, const DominatorTree *DT = 0); + const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, + const DominatorTree *DT = 0); /// SimplifySubInst - Given operands for a Sub, see if we can /// fold the result. If not, this returns null. Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, - const TargetData *TD = 0, const DominatorTree *DT = 0); + const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, + const DominatorTree *DT = 0); /// SimplifyMulInst - Given operands for a Mul, see if we can /// fold the result. If not, this returns null. Value *SimplifyMulInst(Value *LHS, Value *RHS, const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); /// SimplifySDivInst - Given operands for an SDiv, see if we can /// fold the result. If not, this returns null. Value *SimplifySDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); /// SimplifyUDivInst - Given operands for a UDiv, see if we can /// fold the result. If not, this returns null. - Value *SimplifyUDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0, + Value *SimplifyUDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); /// SimplifyFDivInst - Given operands for an FDiv, see if we can /// fold the result. If not, this returns null. Value *SimplifyFDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); /// SimplifySRemInst - Given operands for an SRem, see if we can /// fold the result. If not, this returns null. - Value *SimplifySRemInst(Value *LHS, Value *RHS, const TargetData *TD = 0, + Value *SimplifySRemInst(Value *LHS, Value *RHS, const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); /// SimplifyURemInst - Given operands for a URem, see if we can /// fold the result. If not, this returns null. Value *SimplifyURemInst(Value *LHS, Value *RHS, const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); /// SimplifyFRemInst - Given operands for an FRem, see if we can /// fold the result. If not, this returns null. Value *SimplifyFRemInst(Value *LHS, Value *RHS, const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); /// SimplifyShlInst - Given operands for a Shl, see if we can /// fold the result. If not, this returns null. Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD = 0, const DominatorTree *DT = 0); + const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, + const DominatorTree *DT = 0); /// SimplifyLShrInst - Given operands for a LShr, see if we can /// fold the result. If not, this returns null. Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const TargetData *TD = 0, const DominatorTree *DT=0); + const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, + const DominatorTree *DT = 0); /// SimplifyAShrInst - Given operands for a AShr, see if we can /// fold the result. If not, this returns null. Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); /// SimplifyAndInst - Given operands for an And, see if we can /// fold the result. If not, this returns null. Value *SimplifyAndInst(Value *LHS, Value *RHS, const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); /// SimplifyOrInst - Given operands for an Or, see if we can /// fold the result. If not, this returns null. Value *SimplifyOrInst(Value *LHS, Value *RHS, const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); /// SimplifyXorInst - Given operands for a Xor, see if we can /// fold the result. If not, this returns null. Value *SimplifyXorInst(Value *LHS, Value *RHS, const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can /// fold the result. If not, this returns null. Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD = 0, + const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can /// fold the result. If not, this returns null. Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD = 0, + const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold /// the result. If not, this returns null. Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can /// fold the result. If not, this returns null. - Value *SimplifyGEPInst(ArrayRef Ops, - const TargetData *TD = 0, const DominatorTree *DT = 0); + Value *SimplifyGEPInst(ArrayRef Ops, const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, + const DominatorTree *DT = 0); /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we /// can fold the result. If not, this returns null. Value *SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef Idxs, const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); + /// SimplifyTruncInst - Given operands for an TruncInst, see if we can fold + /// the result. If not, this returns null. + Value *SimplifyTruncInst(Value *Op, Type *Ty, const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, + const DominatorTree *DT = 0); + //=== Helper functions for higher up the class hierarchy. /// SimplifyCmpInst - Given operands for a CmpInst, see if we can /// fold the result. If not, this returns null. Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD = 0, const DominatorTree *DT = 0); + const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, + const DominatorTree *DT = 0); /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can /// fold the result. If not, this returns null. Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const TargetData *TD = 0, const DominatorTree *DT = 0); + const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, + const DominatorTree *DT = 0); /// SimplifyInstruction - See if we can compute a simplified version of this /// instruction. If not, this returns null. Value *SimplifyInstruction(Instruction *I, const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, const DominatorTree *DT = 0); - /// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then - /// delete the From instruction. In addition to a basic RAUW, this does a - /// recursive simplification of the updated instructions. This catches - /// things where one simplification exposes other opportunities. This only - /// simplifies and deletes scalar operations, it does not change the CFG. + /// \brief Replace all uses of 'I' with 'SimpleV' and simplify the uses + /// recursively. /// - void ReplaceAndSimplifyAllUses(Instruction *From, Value *To, - const TargetData *TD = 0, - const DominatorTree *DT = 0); + /// This first performs a normal RAUW of I with SimpleV. It then recursively + /// attempts to simplify those users updated by the operation. The 'I' + /// instruction must not be equal to the simplified value 'SimpleV'. + /// + /// The function returns true if any simplifications were performed. + bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, + const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, + const DominatorTree *DT = 0); + + /// \brief Recursively attempt to simplify an instruction. + /// + /// This routine uses SimplifyInstruction to simplify 'I', and if successful + /// replaces uses of 'I' with the simplified value. It then recurses on each + /// of the users impacted. It returns true if any simplifications were + /// performed. + bool recursivelySimplifyInstruction(Instruction *I, + const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0, + const DominatorTree *DT = 0); } // end namespace llvm #endif diff --git a/contrib/llvm/include/llvm/Analysis/IntervalIterator.h b/contrib/llvm/include/llvm/Analysis/IntervalIterator.h index 82b3294cc504..0968c7468e68 100644 --- a/contrib/llvm/include/llvm/Analysis/IntervalIterator.h +++ b/contrib/llvm/include/llvm/Analysis/IntervalIterator.h @@ -101,14 +101,14 @@ class IntervalIterator { IntervalIterator(Function *M, bool OwnMemory) : IOwnMem(OwnMemory) { OrigContainer = M; if (!ProcessInterval(&M->front())) { - assert(0 && "ProcessInterval should never fail for first interval!"); + llvm_unreachable("ProcessInterval should never fail for first interval!"); } } IntervalIterator(IntervalPartition &IP, bool OwnMemory) : IOwnMem(OwnMemory) { OrigContainer = &IP; if (!ProcessInterval(IP.getRootInterval())) { - assert(0 && "ProcessInterval should never fail for first interval!"); + llvm_unreachable("ProcessInterval should never fail for first interval!"); } } diff --git a/contrib/llvm/include/llvm/Analysis/LazyValueInfo.h b/contrib/llvm/include/llvm/Analysis/LazyValueInfo.h index fc4d0af920e9..065c230fb2fd 100644 --- a/contrib/llvm/include/llvm/Analysis/LazyValueInfo.h +++ b/contrib/llvm/include/llvm/Analysis/LazyValueInfo.h @@ -20,12 +20,14 @@ namespace llvm { class Constant; class TargetData; + class TargetLibraryInfo; class Value; /// LazyValueInfo - This pass computes, caches, and vends lazy value constraint /// information. class LazyValueInfo : public FunctionPass { class TargetData *TD; + class TargetLibraryInfo *TLI; void *PImpl; LazyValueInfo(const LazyValueInfo&); // DO NOT IMPLEMENT. void operator=(const LazyValueInfo&); // DO NOT IMPLEMENT. @@ -68,9 +70,7 @@ class LazyValueInfo : public FunctionPass { // Implementation boilerplate. - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - } + virtual void getAnalysisUsage(AnalysisUsage &AU) const; virtual void releaseMemory(); virtual bool runOnFunction(Function &F); }; diff --git a/contrib/llvm/include/llvm/Analysis/Loads.h b/contrib/llvm/include/llvm/Analysis/Loads.h index 1574262dd6d3..5f0aefbeb015 100644 --- a/contrib/llvm/include/llvm/Analysis/Loads.h +++ b/contrib/llvm/include/llvm/Analysis/Loads.h @@ -20,6 +20,7 @@ namespace llvm { class AliasAnalysis; class TargetData; +class MDNode; /// isSafeToLoadUnconditionally - Return true if we know that executing a load /// from this value cannot trap. If it is not obviously safe to load from the @@ -41,10 +42,15 @@ bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, /// MaxInstsToScan specifies the maximum instructions to scan in the block. /// If it is set to 0, it will scan the whole block. You can also optionally /// specify an alias analysis implementation, which makes this more precise. +/// +/// If TBAATag is non-null and a load or store is found, the TBAA tag from the +/// load or store is recorded there. If there is no TBAA tag or if no access +/// is found, it is left unmodified. Value *FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan = 6, - AliasAnalysis *AA = 0); + AliasAnalysis *AA = 0, + MDNode **TBAATag = 0); } diff --git a/contrib/llvm/include/llvm/Analysis/LoopInfo.h b/contrib/llvm/include/llvm/Analysis/LoopInfo.h index 12cb6c5cc480..91feaaac038d 100644 --- a/contrib/llvm/include/llvm/Analysis/LoopInfo.h +++ b/contrib/llvm/include/llvm/Analysis/LoopInfo.h @@ -23,7 +23,6 @@ // * whether or not a particular block branches out of the loop // * the successor blocks of the loop // * the loop depth -// * the trip count // * etc... // //===----------------------------------------------------------------------===// @@ -416,14 +415,26 @@ class LoopBase { #ifndef NDEBUG assert(!Blocks.empty() && "Loop header is missing"); + // Setup for using a depth-first iterator to visit every block in the loop. + SmallVector ExitBBs; + getExitBlocks(ExitBBs); + llvm::SmallPtrSet VisitSet; + VisitSet.insert(ExitBBs.begin(), ExitBBs.end()); + df_ext_iterator > + BI = df_ext_begin(getHeader(), VisitSet), + BE = df_ext_end(getHeader(), VisitSet); + + // Keep track of the number of BBs visited. + unsigned NumVisited = 0; + // Sort the blocks vector so that we can use binary search to do quick // lookups. SmallVector LoopBBs(block_begin(), block_end()); std::sort(LoopBBs.begin(), LoopBBs.end()); // Check the individual blocks. - for (block_iterator I = block_begin(), E = block_end(); I != E; ++I) { - BlockT *BB = *I; + for ( ; BI != BE; ++BI) { + BlockT *BB = *BI; bool HasInsideLoopSuccs = false; bool HasInsideLoopPreds = false; SmallVector OutsideLoopPreds; @@ -440,7 +451,7 @@ class LoopBase { for (typename InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(BB), PE = InvBlockTraits::child_end(BB); PI != PE; ++PI) { - typename InvBlockTraits::NodeType *N = *PI; + BlockT *N = *PI; if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), N)) HasInsideLoopPreds = true; else @@ -464,8 +475,12 @@ class LoopBase { assert(HasInsideLoopSuccs && "Loop block has no in-loop successors!"); assert(BB != getHeader()->getParent()->begin() && "Loop contains function entry block!"); + + NumVisited++; } + assert(NumVisited == getNumBlocks() && "Unreachable block in loop"); + // Check the subloops. for (iterator I = begin(), E = end(); I != E; ++I) // Each block in each subloop should be contained within this loop. @@ -571,37 +586,6 @@ class Loop : public LoopBase { /// PHINode *getCanonicalInductionVariable() const; - /// getTripCount - Return a loop-invariant LLVM value indicating the number of - /// times the loop will be executed. Note that this means that the backedge - /// of the loop executes N-1 times. If the trip-count cannot be determined, - /// this returns null. - /// - /// The IndVarSimplify pass transforms loops to have a form that this - /// function easily understands. - /// - Value *getTripCount() const; - - /// getSmallConstantTripCount - Returns the trip count of this loop as a - /// normal unsigned value, if possible. Returns 0 if the trip count is unknown - /// of not constant. Will also return 0 if the trip count is very large - /// (>= 2^32) - /// - /// The IndVarSimplify pass transforms loops to have a form that this - /// function easily understands. - /// - unsigned getSmallConstantTripCount() const; - - /// getSmallConstantTripMultiple - Returns the largest constant divisor of the - /// trip count of this loop as a normal unsigned value, if possible. This - /// means that the actual trip count is always a multiple of the returned - /// value (don't forget the trip count could very well be zero as well!). - /// - /// Returns 1 if the trip count is unknown or not guaranteed to be the - /// multiple of a constant (which is also the case if the trip count is simply - /// constant, use getSmallConstantTripCount for that case), Will also return 1 - /// if the trip count is very large (>= 2^32). - unsigned getSmallConstantTripMultiple() const; - /// isLCSSAForm - Return true if the Loop is in LCSSA form bool isLCSSAForm(DominatorTree &DT) const; @@ -610,6 +594,9 @@ class Loop : public LoopBase { /// normal form. bool isLoopSimplifyForm() const; + /// isSafeToClone - Return true if the loop body is safe to clone in practice. + bool isSafeToClone() const; + /// hasDedicatedExits - Return true if no exit block for the loop /// has a predecessor that is outside the loop. bool hasDedicatedExits() const; @@ -671,9 +658,7 @@ class LoopInfoBase { /// block is in no loop (for example the entry node), null is returned. /// LoopT *getLoopFor(const BlockT *BB) const { - typename DenseMap::const_iterator I= - BBMap.find(const_cast(BB)); - return I != BBMap.end() ? I->second : 0; + return BBMap.lookup(const_cast(BB)); } /// operator[] - same as getLoopFor... @@ -712,9 +697,7 @@ class LoopInfoBase { /// the loop hierarchy tree. void changeLoopFor(BlockT *BB, LoopT *L) { if (!L) { - typename DenseMap::iterator I = BBMap.find(BB); - if (I != BBMap.end()) - BBMap.erase(I); + BBMap.erase(BB); return; } BBMap[BB] = L; @@ -771,7 +754,7 @@ class LoopInfoBase { } LoopT *ConsiderForLoop(BlockT *BB, DominatorTreeBase &DT) { - if (BBMap.find(BB) != BBMap.end()) return 0;// Haven't processed this node? + if (BBMap.count(BB)) return 0; // Haven't processed this node? std::vector TodoStack; @@ -782,7 +765,8 @@ class LoopInfoBase { InvBlockTraits::child_begin(BB), E = InvBlockTraits::child_end(BB); I != E; ++I) { typename InvBlockTraits::NodeType *N = *I; - if (DT.dominates(BB, N)) // If BB dominates its predecessor... + // If BB dominates its predecessor... + if (DT.dominates(BB, N) && DT.isReachableFromEntry(N)) TodoStack.push_back(N); } @@ -792,14 +776,12 @@ class LoopInfoBase { LoopT *L = new LoopT(BB); BBMap[BB] = L; - BlockT *EntryBlock = BB->getParent()->begin(); - while (!TodoStack.empty()) { // Process all the nodes in the loop BlockT *X = TodoStack.back(); TodoStack.pop_back(); if (!L->contains(X) && // As of yet unprocessed?? - DT.dominates(EntryBlock, X)) { // X is reachable from entry block? + DT.isReachableFromEntry(X)) { // Check to see if this block already belongs to a loop. If this occurs // then we have a case where a loop that is supposed to be a child of // the current loop was processed before the current loop. When this diff --git a/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h index e18d937f6916..68ce364f4413 100644 --- a/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -324,6 +324,7 @@ namespace llvm { /// Current AA implementation, just a cache. AliasAnalysis *AA; TargetData *TD; + DominatorTree *DT; OwningPtr PredCache; public: MemoryDependenceAnalysis(); @@ -430,6 +431,9 @@ namespace llvm { void RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P); + AliasAnalysis::ModRefResult + getModRefInfo(const Instruction *Inst, const AliasAnalysis::Location &Loc); + /// verifyRemoved - Verify that the specified instruction does not occur /// in our internal data structures. void verifyRemoved(Instruction *Inst) const; diff --git a/contrib/llvm/include/llvm/Analysis/PHITransAddr.h b/contrib/llvm/include/llvm/Analysis/PHITransAddr.h index 033efba3e742..ff9a24790a99 100644 --- a/contrib/llvm/include/llvm/Analysis/PHITransAddr.h +++ b/contrib/llvm/include/llvm/Analysis/PHITransAddr.h @@ -20,7 +20,8 @@ namespace llvm { class DominatorTree; class TargetData; - + class TargetLibraryInfo; + /// PHITransAddr - An address value which tracks and handles phi translation. /// As we walk "up" the CFG through predecessors, we need to ensure that the /// address we're tracking is kept up to date. For example, if we're analyzing @@ -37,11 +38,14 @@ class PHITransAddr { /// TD - The target data we are playing with if known, otherwise null. const TargetData *TD; + + /// TLI - The target library info if known, otherwise null. + const TargetLibraryInfo *TLI; /// InstInputs - The inputs for our symbolic address. SmallVector InstInputs; public: - PHITransAddr(Value *addr, const TargetData *td) : Addr(addr), TD(td) { + PHITransAddr(Value *addr, const TargetData *td) : Addr(addr), TD(td), TLI(0) { // If the address is an instruction, the whole thing is considered an input. if (Instruction *I = dyn_cast(Addr)) InstInputs.push_back(I); diff --git a/contrib/llvm/include/llvm/Analysis/ProfileInfo.h b/contrib/llvm/include/llvm/Analysis/ProfileInfo.h index 300a0279042c..6c2e2732d344 100644 --- a/contrib/llvm/include/llvm/Analysis/ProfileInfo.h +++ b/contrib/llvm/include/llvm/Analysis/ProfileInfo.h @@ -22,6 +22,7 @@ #define LLVM_ANALYSIS_PROFILEINFO_H #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include @@ -85,13 +86,11 @@ namespace llvm { // getFunction() - Returns the Function for an Edge, checking for validity. static const FType* getFunction(Edge e) { - if (e.first) { + if (e.first) return e.first->getParent(); - } else if (e.second) { + if (e.second) return e.second->getParent(); - } - assert(0 && "Invalid ProfileInfo::Edge"); - return (const FType*)0; + llvm_unreachable("Invalid ProfileInfo::Edge"); } // getEdge() - Creates an Edge from two BasicBlocks. diff --git a/contrib/llvm/include/llvm/Analysis/RegionInfo.h b/contrib/llvm/include/llvm/Analysis/RegionInfo.h index 9d8954595d61..b098eeaa3db8 100644 --- a/contrib/llvm/include/llvm/Analysis/RegionInfo.h +++ b/contrib/llvm/include/llvm/Analysis/RegionInfo.h @@ -681,7 +681,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const RegionNode &Node) { if (Node.isSubRegion()) return OS << Node.getNodeAs()->getNameStr(); else - return OS << Node.getNodeAs()->getNameStr(); + return OS << Node.getNodeAs()->getName(); } } // End llvm namespace #endif diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h index 10d933e68f5b..72408f773840 100644 --- a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -41,6 +41,7 @@ namespace llvm { class Type; class ScalarEvolution; class TargetData; + class TargetLibraryInfo; class LLVMContext; class Loop; class LoopInfo; @@ -118,6 +119,10 @@ namespace llvm { /// bool isAllOnesValue() const; + /// isNonConstantNegative - Return true if the specified scev is negated, + /// but not a constant. + bool isNonConstantNegative() const; + /// print - Print out the internal representation of this scalar to the /// specified stream. This should really only be used for debugging /// purposes. @@ -135,7 +140,7 @@ namespace llvm { ID = X.FastID; } static bool Equals(const SCEV &X, const FoldingSetNodeID &ID, - FoldingSetNodeID &TempID) { + unsigned IDHash, FoldingSetNodeID &TempID) { return ID == X.FastID; } static unsigned ComputeHash(const SCEV &X, FoldingSetNodeID &TempID) { @@ -224,6 +229,10 @@ namespace llvm { /// TargetData *TD; + /// TLI - The target library information for the target we are targeting. + /// + TargetLibraryInfo *TLI; + /// DT - The dominator tree. /// DominatorTree *DT; @@ -721,16 +730,21 @@ namespace llvm { const SCEV *LHS, const SCEV *RHS); /// getSmallConstantTripCount - Returns the maximum trip count of this loop - /// as a normal unsigned value, if possible. Returns 0 if the trip count is - /// unknown or not constant. - unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitBlock); + /// as a normal unsigned value. Returns 0 if the trip count is unknown or + /// not constant. This "trip count" assumes that control exits via + /// ExitingBlock. More precisely, it is the number of times that control may + /// reach ExitingBlock before taking the branch. For loops with multiple + /// exits, it may not be the number times that the loop header executes if + /// the loop exits prematurely via another branch. + unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock); /// getSmallConstantTripMultiple - Returns the largest constant divisor of /// the trip count of this loop as a normal unsigned value, if /// possible. This means that the actual trip count is always a multiple of /// the returned value (don't forget the trip count could very well be zero - /// as well!). - unsigned getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitBlock); + /// as well!). As explained in the comments for getSmallConstantTripCount, + /// this assumes that control exits the loop via ExitingBlock. + unsigned getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock); // getExitCount - Get the expression for the number of loop iterations for // which this loop is guaranteed not to exit via ExitingBlock. Otherwise diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h index a4ad1451d412..c22fc3ab74b7 100644 --- a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -22,6 +22,8 @@ #include namespace llvm { + class TargetLowering; + /// SCEVExpander - This class uses information about analyze scalars to /// rewrite expressions in canonical form. /// @@ -58,6 +60,9 @@ namespace llvm { /// insert the IV increment at this position. Instruction *IVIncInsertPos; + /// Phis that complete an IV chain. Reuse + std::set > ChainedPhis; + /// CanonicalMode - When true, expressions are expanded in "canonical" /// form. In particular, addrecs are expanded as arithmetic based on /// a canonical induction variable. When false, expression are expanded @@ -100,6 +105,7 @@ namespace llvm { InsertedExpressions.clear(); InsertedValues.clear(); InsertedPostIncValues.clear(); + ChainedPhis.clear(); } /// getOrInsertCanonicalInductionVariable - This method returns the @@ -108,14 +114,18 @@ namespace llvm { /// starts at zero and steps by one on each iteration. PHINode *getOrInsertCanonicalInductionVariable(const Loop *L, Type *Ty); - /// hoistStep - Utility for hoisting an IV increment. - static bool hoistStep(Instruction *IncV, Instruction *InsertPos, - const DominatorTree *DT); + /// getIVIncOperand - Return the induction variable increment's IV operand. + Instruction *getIVIncOperand(Instruction *IncV, Instruction *InsertPos, + bool allowScale); + + /// hoistIVInc - Utility for hoisting an IV increment. + bool hoistIVInc(Instruction *IncV, Instruction *InsertPos); /// replaceCongruentIVs - replace congruent phis with their most canonical /// representative. Return the number of phis eliminated. unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, - SmallVectorImpl &DeadInsts); + SmallVectorImpl &DeadInsts, + const TargetLowering *TLI = NULL); /// expandCodeFor - Insert code to directly compute the specified SCEV /// expression into the program. The inserted code is inserted into the @@ -161,6 +171,16 @@ namespace llvm { void clearInsertPoint() { Builder.ClearInsertionPoint(); } + + /// isInsertedInstruction - Return true if the specified instruction was + /// inserted by the code rewriter. If so, the client should not modify the + /// instruction. + bool isInsertedInstruction(Instruction *I) const { + return InsertedValues.count(I) || InsertedPostIncValues.count(I); + } + + void setChainedPhi(PHINode *PN) { ChainedPhis.insert(PN); } + private: LLVMContext &getContext() const { return SE.getContext(); } @@ -195,13 +215,6 @@ namespace llvm { /// result will be expanded to have that type, with a cast if necessary. Value *expandCodeFor(const SCEV *SH, Type *Ty = 0); - /// isInsertedInstruction - Return true if the specified instruction was - /// inserted by the code rewriter. If so, the client should not modify the - /// instruction. - bool isInsertedInstruction(Instruction *I) const { - return InsertedValues.count(I) || InsertedPostIncValues.count(I); - } - /// getRelevantLoop - Determine the most "relevant" loop for the given SCEV. const Loop *getRelevantLoop(const SCEV *); @@ -244,6 +257,8 @@ namespace llvm { const Loop *L, Type *ExpandTy, Type *IntTy); + Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L, + Type *ExpandTy, Type *IntTy, bool useSubtract); }; } diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h index b6f0ae54cfa0..47b371029186 100644 --- a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -491,7 +491,6 @@ namespace llvm { RetVal visitCouldNotCompute(const SCEVCouldNotCompute *S) { llvm_unreachable("Invalid use of SCEVCouldNotCompute!"); - return RetVal(); } }; } diff --git a/contrib/llvm/include/llvm/Analysis/ValueTracking.h b/contrib/llvm/include/llvm/Analysis/ValueTracking.h index 68263300c726..f2f9db4ce4e8 100644 --- a/contrib/llvm/include/llvm/Analysis/ValueTracking.h +++ b/contrib/llvm/include/llvm/Analysis/ValueTracking.h @@ -17,15 +17,15 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/DataTypes.h" -#include namespace llvm { - template class SmallVectorImpl; class Value; class Instruction; class APInt; class TargetData; - + class StringRef; + class MDNode; + /// ComputeMaskedBits - Determine which of the bits specified in Mask are /// known to be either zero or one and return them in the KnownZero/KnownOne /// bit sets. This code only analyzes bits in Mask, in order to short-circuit @@ -36,10 +36,10 @@ namespace llvm { /// where V is a vector, the mask, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. - void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero, - APInt &KnownOne, const TargetData *TD = 0, - unsigned Depth = 0); - + void ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, + const TargetData *TD = 0, unsigned Depth = 0); + void computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero); + /// ComputeSignBit - Determine whether the sign bit is known to be zero or /// one. Convenience wrapper around ComputeMaskedBits. void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, @@ -48,8 +48,10 @@ namespace llvm { /// isPowerOfTwo - Return true if the given value is known to have exactly one /// bit set when defined. For vectors return true if every element is known to /// be a power of two when defined. Supports values with integer or pointer - /// type and vectors of integers. - bool isPowerOfTwo(Value *V, const TargetData *TD = 0, unsigned Depth = 0); + /// type and vectors of integers. If 'OrZero' is set then returns true if the + /// given value is either a power of two or zero. + bool isPowerOfTwo(Value *V, const TargetData *TD = 0, bool OrZero = false, + unsigned Depth = 0); /// isKnownNonZero - Return true if the given value is known to be non-zero /// when defined. For vectors return true if every element is known to be @@ -123,16 +125,15 @@ namespace llvm { return GetPointerBaseWithConstantOffset(const_cast(Ptr), Offset,TD); } - /// GetConstantStringInfo - This function computes the length of a + /// getConstantStringInfo - This function computes the length of a /// null-terminated C string pointed to by V. If successful, it returns true - /// and returns the string in Str. If unsuccessful, it returns false. If - /// StopAtNul is set to true (the default), the returned string is truncated - /// by a nul character in the global. If StopAtNul is false, the nul - /// character is included in the result string. - bool GetConstantStringInfo(const Value *V, std::string &Str, - uint64_t Offset = 0, - bool StopAtNul = true); - + /// and returns the string in Str. If unsuccessful, it returns false. This + /// does not include the trailing nul character by default. If TrimAtNul is + /// set to false, then this returns any trailing nul characters as well as any + /// other characters that come after it. + bool getConstantStringInfo(const Value *V, StringRef &Str, + uint64_t Offset = 0, bool TrimAtNul = true); + /// GetStringLength - If we can compute the length of the string pointed to by /// the specified pointer, return 'len+1'. If we can't, return 0. uint64_t GetStringLength(Value *V); @@ -154,6 +155,27 @@ namespace llvm { /// are lifetime markers. bool onlyUsedByLifetimeMarkers(const Value *V); + /// isSafeToSpeculativelyExecute - Return true if the instruction does not + /// have any effects besides calculating the result and does not have + /// undefined behavior. + /// + /// This method never returns true for an instruction that returns true for + /// mayHaveSideEffects; however, this method also does some other checks in + /// addition. It checks for undefined behavior, like dividing by zero or + /// loading from an invalid pointer (but not for undefined results, like a + /// shift with a shift amount larger than the width of the result). It checks + /// for malloc and alloca because speculatively executing them might cause a + /// memory leak. It also returns false for instructions related to control + /// flow, specifically terminators and PHI nodes. + /// + /// This method only looks at the instruction itself and its operands, so if + /// this method returns true, it is safe to move the instruction as long as + /// the correct dominance relationships for the operands and users hold. + /// However, this method can return true for instructions that read memory; + /// for such instructions, moving them may change the resulting value. + bool isSafeToSpeculativelyExecute(const Value *V, + const TargetData *TD = 0); + } // end namespace llvm #endif diff --git a/contrib/llvm/include/llvm/Argument.h b/contrib/llvm/include/llvm/Argument.h index cd7488266231..e66075c1f235 100644 --- a/contrib/llvm/include/llvm/Argument.h +++ b/contrib/llvm/include/llvm/Argument.h @@ -30,6 +30,7 @@ template /// the function was called with. /// @brief LLVM Argument representation class Argument : public Value, public ilist_node { + virtual void anchor(); Function *Parent; friend class SymbolTableListTraits; diff --git a/contrib/llvm/include/llvm/Assembly/AssemblyAnnotationWriter.h b/contrib/llvm/include/llvm/Assembly/AssemblyAnnotationWriter.h index 3a65f97a5b50..37b47c31e8c7 100644 --- a/contrib/llvm/include/llvm/Assembly/AssemblyAnnotationWriter.h +++ b/contrib/llvm/include/llvm/Assembly/AssemblyAnnotationWriter.h @@ -22,7 +22,7 @@ namespace llvm { class Function; class BasicBlock; class Instruction; -class raw_ostream; +class Value; class formatted_raw_ostream; class AssemblyAnnotationWriter { @@ -32,30 +32,30 @@ class AssemblyAnnotationWriter { /// emitFunctionAnnot - This may be implemented to emit a string right before /// the start of a function. - virtual void emitFunctionAnnot(const Function *F, - formatted_raw_ostream &OS) {} + virtual void emitFunctionAnnot(const Function *, + formatted_raw_ostream &) {} /// emitBasicBlockStartAnnot - This may be implemented to emit a string right /// after the basic block label, but before the first instruction in the /// block. - virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, - formatted_raw_ostream &OS) { + virtual void emitBasicBlockStartAnnot(const BasicBlock *, + formatted_raw_ostream &) { } /// emitBasicBlockEndAnnot - This may be implemented to emit a string right /// after the basic block. - virtual void emitBasicBlockEndAnnot(const BasicBlock *BB, - formatted_raw_ostream &OS) { + virtual void emitBasicBlockEndAnnot(const BasicBlock *, + formatted_raw_ostream &) { } /// emitInstructionAnnot - This may be implemented to emit a string right /// before an instruction is emitted. - virtual void emitInstructionAnnot(const Instruction *I, - formatted_raw_ostream &OS) {} + virtual void emitInstructionAnnot(const Instruction *, + formatted_raw_ostream &) {} /// printInfoComment - This may be implemented to emit a comment to the /// right of an instruction or global value. - virtual void printInfoComment(const Value &V, formatted_raw_ostream &OS) {} + virtual void printInfoComment(const Value &, formatted_raw_ostream &) {} }; } // End llvm namespace diff --git a/contrib/llvm/include/llvm/Assembly/Parser.h b/contrib/llvm/include/llvm/Assembly/Parser.h index 82ec6d81367b..b971c531ae05 100644 --- a/contrib/llvm/include/llvm/Assembly/Parser.h +++ b/contrib/llvm/include/llvm/Assembly/Parser.h @@ -21,7 +21,6 @@ namespace llvm { class Module; class MemoryBuffer; class SMDiagnostic; -class raw_ostream; class LLVMContext; /// This function is the main interface to the LLVM Assembly Parser. It parses diff --git a/contrib/llvm/include/llvm/Assembly/Writer.h b/contrib/llvm/include/llvm/Assembly/Writer.h index 8d8befd472d8..6b89ae022da3 100644 --- a/contrib/llvm/include/llvm/Assembly/Writer.h +++ b/contrib/llvm/include/llvm/Assembly/Writer.h @@ -19,7 +19,6 @@ namespace llvm { -class Type; class Module; class Value; class raw_ostream; diff --git a/contrib/llvm/include/llvm/Attributes.h b/contrib/llvm/include/llvm/Attributes.h index 2d7b33b29bcf..0099f173b626 100644 --- a/contrib/llvm/include/llvm/Attributes.h +++ b/contrib/llvm/include/llvm/Attributes.h @@ -22,8 +22,66 @@ namespace llvm { class Type; +namespace Attribute { +/// We use this proxy POD type to allow constructing Attributes constants +/// using initializer lists. Do not use this class directly. +struct AttrConst { + uint64_t v; + AttrConst operator | (const AttrConst Attrs) const { + AttrConst Res = {v | Attrs.v}; + return Res; + } + AttrConst operator ~ () const { + AttrConst Res = {~v}; + return Res; + } +}; +} // namespace Attribute + + /// Attributes - A bitset of attributes. -typedef unsigned Attributes; +class Attributes { + public: + Attributes() : Bits(0) { } + explicit Attributes(uint64_t Val) : Bits(Val) { } + /*implicit*/ Attributes(Attribute::AttrConst Val) : Bits(Val.v) { } + Attributes(const Attributes &Attrs) : Bits(Attrs.Bits) { } + // This is a "safe bool() operator". + operator const void *() const { return Bits ? this : 0; } + bool isEmptyOrSingleton() const { return (Bits & (Bits - 1)) == 0; } + Attributes &operator = (const Attributes &Attrs) { + Bits = Attrs.Bits; + return *this; + } + bool operator == (const Attributes &Attrs) const { + return Bits == Attrs.Bits; + } + bool operator != (const Attributes &Attrs) const { + return Bits != Attrs.Bits; + } + Attributes operator | (const Attributes &Attrs) const { + return Attributes(Bits | Attrs.Bits); + } + Attributes operator & (const Attributes &Attrs) const { + return Attributes(Bits & Attrs.Bits); + } + Attributes operator ^ (const Attributes &Attrs) const { + return Attributes(Bits ^ Attrs.Bits); + } + Attributes &operator |= (const Attributes &Attrs) { + Bits |= Attrs.Bits; + return *this; + } + Attributes &operator &= (const Attributes &Attrs) { + Bits &= Attrs.Bits; + return *this; + } + Attributes operator ~ () const { return Attributes(~Bits); } + uint64_t Raw() const { return Bits; } + private: + // Currently, we need less than 64 bits. + uint64_t Bits; +}; namespace Attribute { @@ -33,44 +91,55 @@ namespace Attribute { /// results or the function itself. /// @brief Function attributes. -const Attributes None = 0; ///< No attributes have been set -const Attributes ZExt = 1<<0; ///< Zero extended before/after call -const Attributes SExt = 1<<1; ///< Sign extended before/after call -const Attributes NoReturn = 1<<2; ///< Mark the function as not returning -const Attributes InReg = 1<<3; ///< Force argument to be passed in register -const Attributes StructRet = 1<<4; ///< Hidden pointer to structure to return -const Attributes NoUnwind = 1<<5; ///< Function doesn't unwind stack -const Attributes NoAlias = 1<<6; ///< Considered to not alias after call -const Attributes ByVal = 1<<7; ///< Pass structure by value -const Attributes Nest = 1<<8; ///< Nested function static chain -const Attributes ReadNone = 1<<9; ///< Function does not access memory -const Attributes ReadOnly = 1<<10; ///< Function only reads from memory -const Attributes NoInline = 1<<11; ///< inline=never -const Attributes AlwaysInline = 1<<12; ///< inline=always -const Attributes OptimizeForSize = 1<<13; ///< opt_size -const Attributes StackProtect = 1<<14; ///< Stack protection. -const Attributes StackProtectReq = 1<<15; ///< Stack protection required. -const Attributes Alignment = 31<<16; ///< Alignment of parameter (5 bits) +// We declare AttrConst objects that will be used throughout the code +// and also raw uint64_t objects with _i suffix to be used below for other +// constant declarations. This is done to avoid static CTORs and at the same +// time to keep type-safety of Attributes. +#define DECLARE_LLVM_ATTRIBUTE(name, value) \ + const uint64_t name##_i = value; \ + const AttrConst name = {value}; + +DECLARE_LLVM_ATTRIBUTE(None,0) ///< No attributes have been set +DECLARE_LLVM_ATTRIBUTE(ZExt,1<<0) ///< Zero extended before/after call +DECLARE_LLVM_ATTRIBUTE(SExt,1<<1) ///< Sign extended before/after call +DECLARE_LLVM_ATTRIBUTE(NoReturn,1<<2) ///< Mark the function as not returning +DECLARE_LLVM_ATTRIBUTE(InReg,1<<3) ///< Force argument to be passed in register +DECLARE_LLVM_ATTRIBUTE(StructRet,1<<4) ///< Hidden pointer to structure to return +DECLARE_LLVM_ATTRIBUTE(NoUnwind,1<<5) ///< Function doesn't unwind stack +DECLARE_LLVM_ATTRIBUTE(NoAlias,1<<6) ///< Considered to not alias after call +DECLARE_LLVM_ATTRIBUTE(ByVal,1<<7) ///< Pass structure by value +DECLARE_LLVM_ATTRIBUTE(Nest,1<<8) ///< Nested function static chain +DECLARE_LLVM_ATTRIBUTE(ReadNone,1<<9) ///< Function does not access memory +DECLARE_LLVM_ATTRIBUTE(ReadOnly,1<<10) ///< Function only reads from memory +DECLARE_LLVM_ATTRIBUTE(NoInline,1<<11) ///< inline=never +DECLARE_LLVM_ATTRIBUTE(AlwaysInline,1<<12) ///< inline=always +DECLARE_LLVM_ATTRIBUTE(OptimizeForSize,1<<13) ///< opt_size +DECLARE_LLVM_ATTRIBUTE(StackProtect,1<<14) ///< Stack protection. +DECLARE_LLVM_ATTRIBUTE(StackProtectReq,1<<15) ///< Stack protection required. +DECLARE_LLVM_ATTRIBUTE(Alignment,31<<16) ///< Alignment of parameter (5 bits) // stored as log2 of alignment with +1 bias // 0 means unaligned different from align 1 -const Attributes NoCapture = 1<<21; ///< Function creates no aliases of pointer -const Attributes NoRedZone = 1<<22; /// disable redzone -const Attributes NoImplicitFloat = 1<<23; /// disable implicit floating point - /// instructions. -const Attributes Naked = 1<<24; ///< Naked function -const Attributes InlineHint = 1<<25; ///< source said inlining was - ///desirable -const Attributes StackAlignment = 7<<26; ///< Alignment of stack for - ///function (3 bits) stored as log2 - ///of alignment with +1 bias - ///0 means unaligned (different from - ///alignstack(1)) -const Attributes ReturnsTwice = 1<<29; ///< Function can return twice -const Attributes UWTable = 1<<30; ///< Function must be in a unwind - ///table -const Attributes NonLazyBind = 1U<<31; ///< Function is called early and/or - /// often, so lazy binding isn't - /// worthwhile. +DECLARE_LLVM_ATTRIBUTE(NoCapture,1<<21) ///< Function creates no aliases of pointer +DECLARE_LLVM_ATTRIBUTE(NoRedZone,1<<22) /// disable redzone +DECLARE_LLVM_ATTRIBUTE(NoImplicitFloat,1<<23) /// disable implicit floating point + /// instructions. +DECLARE_LLVM_ATTRIBUTE(Naked,1<<24) ///< Naked function +DECLARE_LLVM_ATTRIBUTE(InlineHint,1<<25) ///< source said inlining was + ///desirable +DECLARE_LLVM_ATTRIBUTE(StackAlignment,7<<26) ///< Alignment of stack for + ///function (3 bits) stored as log2 + ///of alignment with +1 bias + ///0 means unaligned (different from + ///alignstack= {1)) +DECLARE_LLVM_ATTRIBUTE(ReturnsTwice,1<<29) ///< Function can return twice +DECLARE_LLVM_ATTRIBUTE(UWTable,1<<30) ///< Function must be in a unwind + ///table +DECLARE_LLVM_ATTRIBUTE(NonLazyBind,1U<<31) ///< Function is called early and/or + /// often, so lazy binding isn't + /// worthwhile. +DECLARE_LLVM_ATTRIBUTE(AddressSafety,1ULL<<32) ///< Address safety checking is on. + +#undef DECLARE_LLVM_ATTRIBUTE /// Note that uwtable is about the ABI or the user mandating an entry in the /// unwind table. The nounwind attribute is about an exception passing by the @@ -85,24 +154,26 @@ const Attributes NonLazyBind = 1U<<31; ///< Function is called early and/or /// uwtable + nounwind = Needs an entry because the ABI says so. /// @brief Attributes that only apply to function parameters. -const Attributes ParameterOnly = ByVal | Nest | StructRet | NoCapture; +const AttrConst ParameterOnly = {ByVal_i | Nest_i | + StructRet_i | NoCapture_i}; /// @brief Attributes that may be applied to the function itself. These cannot /// be used on return values or function parameters. -const Attributes FunctionOnly = NoReturn | NoUnwind | ReadNone | ReadOnly | - NoInline | AlwaysInline | OptimizeForSize | StackProtect | StackProtectReq | - NoRedZone | NoImplicitFloat | Naked | InlineHint | StackAlignment | - UWTable | NonLazyBind | ReturnsTwice; +const AttrConst FunctionOnly = {NoReturn_i | NoUnwind_i | ReadNone_i | + ReadOnly_i | NoInline_i | AlwaysInline_i | OptimizeForSize_i | + StackProtect_i | StackProtectReq_i | NoRedZone_i | NoImplicitFloat_i | + Naked_i | InlineHint_i | StackAlignment_i | + UWTable_i | NonLazyBind_i | ReturnsTwice_i | AddressSafety_i}; /// @brief Parameter attributes that do not apply to vararg call arguments. -const Attributes VarArgsIncompatible = StructRet; +const AttrConst VarArgsIncompatible = {StructRet_i}; /// @brief Attributes that are mutually incompatible. -const Attributes MutuallyIncompatible[4] = { - ByVal | InReg | Nest | StructRet, - ZExt | SExt, - ReadNone | ReadOnly, - NoInline | AlwaysInline +const AttrConst MutuallyIncompatible[4] = { + {ByVal_i | InReg_i | Nest_i | StructRet_i}, + {ZExt_i | SExt_i}, + {ReadNone_i | ReadOnly_i}, + {NoInline_i | AlwaysInline_i} }; /// @brief Which attributes cannot be applied to a type. @@ -113,20 +184,20 @@ Attributes typeIncompatible(Type *Ty); inline Attributes constructAlignmentFromInt(unsigned i) { // Default alignment, allow the target to define how to align it. if (i == 0) - return 0; + return None; assert(isPowerOf2_32(i) && "Alignment must be a power of two."); assert(i <= 0x40000000 && "Alignment too large."); - return (Log2_32(i)+1) << 16; + return Attributes((Log2_32(i)+1) << 16); } /// This returns the alignment field of an attribute as a byte alignment value. inline unsigned getAlignmentFromAttrs(Attributes A) { Attributes Align = A & Attribute::Alignment; - if (Align == 0) + if (!Align) return 0; - return 1U << ((Align >> 16) - 1); + return 1U << ((Align.Raw() >> 16) - 1); } /// This turns an int stack alignment (which must be a power of 2) into @@ -134,21 +205,21 @@ inline unsigned getAlignmentFromAttrs(Attributes A) { inline Attributes constructStackAlignmentFromInt(unsigned i) { // Default alignment, allow the target to define how to align it. if (i == 0) - return 0; + return None; assert(isPowerOf2_32(i) && "Alignment must be a power of two."); assert(i <= 0x100 && "Alignment too large."); - return (Log2_32(i)+1) << 26; + return Attributes((Log2_32(i)+1) << 26); } /// This returns the stack alignment field of an attribute as a byte alignment /// value. inline unsigned getStackAlignmentFromAttrs(Attributes A) { Attributes StackAlign = A & Attribute::StackAlignment; - if (StackAlign == 0) + if (!StackAlign) return 0; - return 1U << ((StackAlign >> 26) - 1); + return 1U << ((StackAlign.Raw() >> 26) - 1); } @@ -242,7 +313,7 @@ class AttrListPtr { /// paramHasAttr - Return true if the specified parameter index has the /// specified attribute set. bool paramHasAttr(unsigned Idx, Attributes Attr) const { - return (getAttributes(Idx) & Attr) != 0; + return getAttributes(Idx) & Attr; } /// getParamAlignment - Return the alignment for the specified function diff --git a/contrib/llvm/include/llvm/AutoUpgrade.h b/contrib/llvm/include/llvm/AutoUpgrade.h index 8ca3548f533e..e13c4c12b0f4 100644 --- a/contrib/llvm/include/llvm/AutoUpgrade.h +++ b/contrib/llvm/include/llvm/AutoUpgrade.h @@ -39,14 +39,6 @@ namespace llvm { /// This checks for global variables which should be upgraded. It returns true /// if it requires upgrading. bool UpgradeGlobalVariable(GlobalVariable *GV); - - /// This function checks debug info intrinsics. If an intrinsic is invalid - /// then this function simply removes the intrinsic. - void CheckDebugInfoIntrinsics(Module *M); - - /// This function upgrades the old pre-3.0 exception handling system to the - /// new one. N.B. This will be removed in 3.1. - void UpgradeExceptionHandling(Module *M); } // End llvm namespace #endif diff --git a/contrib/llvm/include/llvm/BasicBlock.h b/contrib/llvm/include/llvm/BasicBlock.h index 1cd8dc55ab58..d2aa1673d921 100644 --- a/contrib/llvm/include/llvm/BasicBlock.h +++ b/contrib/llvm/include/llvm/BasicBlock.h @@ -110,12 +110,6 @@ class BasicBlock : public Value, // Basic blocks are data objects also const Function *getParent() const { return Parent; } Function *getParent() { return Parent; } - /// use_back - Specialize the methods defined in Value, as we know that an - /// BasicBlock can only be used by Users (specifically terminators - /// and BlockAddress's). - User *use_back() { return cast(*use_begin());} - const User *use_back() const { return cast(*use_begin());} - /// getTerminator() - If this is a well formed basic block, then this returns /// a pointer to the terminator instruction. If it is not, then you get a /// null pointer back. @@ -274,6 +268,7 @@ class BasicBlock : public Value, // Basic blocks are data objects also /// getLandingPadInst() - Return the landingpad instruction associated with /// the landing pad. LandingPadInst *getLandingPadInst(); + const LandingPadInst *getLandingPadInst() const; private: /// AdjustBlockAddressRefCount - BasicBlock stores the number of BlockAddress diff --git a/contrib/llvm/include/llvm/Bitcode/Archive.h b/contrib/llvm/include/llvm/Bitcode/Archive.h index f89a86cb0f77..86c44c7f150b 100644 --- a/contrib/llvm/include/llvm/Bitcode/Archive.h +++ b/contrib/llvm/include/llvm/Bitcode/Archive.h @@ -394,7 +394,7 @@ class Archive { /// @brief Look up multiple symbols in the archive. bool findModulesDefiningSymbols( std::set& symbols, ///< Symbols to be sought - std::set& modules, ///< The modules matching \p symbols + SmallVectorImpl& modules, ///< The modules matching \p symbols std::string* ErrMessage ///< Error msg storage, if non-zero ); diff --git a/contrib/llvm/include/llvm/Bitcode/BitCodes.h b/contrib/llvm/include/llvm/Bitcode/BitCodes.h index 449dc35d7de2..28e1ab1c8711 100644 --- a/contrib/llvm/include/llvm/Bitcode/BitCodes.h +++ b/contrib/llvm/include/llvm/Bitcode/BitCodes.h @@ -20,6 +20,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" #include namespace llvm { @@ -114,7 +115,6 @@ class BitCodeAbbrevOp { bool hasEncodingData() const { return hasEncodingData(getEncoding()); } static bool hasEncodingData(Encoding E) { switch (E) { - default: assert(0 && "Unknown encoding"); case Fixed: case VBR: return true; @@ -123,6 +123,7 @@ class BitCodeAbbrevOp { case Blob: return false; } + llvm_unreachable("Invalid encoding"); } /// isChar6 - Return true if this character is legal in the Char6 encoding. @@ -139,8 +140,7 @@ class BitCodeAbbrevOp { if (C >= '0' && C <= '9') return C-'0'+26+26; if (C == '.') return 62; if (C == '_') return 63; - assert(0 && "Not a value Char6 character!"); - return 0; + llvm_unreachable("Not a value Char6 character!"); } static char DecodeChar6(unsigned V) { @@ -150,17 +150,18 @@ class BitCodeAbbrevOp { if (V < 26+26+10) return V-26-26+'0'; if (V == 62) return '.'; if (V == 63) return '_'; - assert(0 && "Not a value Char6 character!"); - return ' '; + llvm_unreachable("Not a value Char6 character!"); } }; +template <> struct isPodLike { static const bool value=true; }; + /// BitCodeAbbrev - This class represents an abbreviation record. An /// abbreviation allows a complex record that has redundancy to be stored in a /// specialized format instead of the fully-general, fully-vbr, format. class BitCodeAbbrev { - SmallVector OperandList; + SmallVector OperandList; unsigned char RefCount; // Number of things using this. ~BitCodeAbbrev() {} public: diff --git a/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h b/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h index 0437f53134dc..65868294403c 100644 --- a/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h +++ b/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h @@ -15,7 +15,10 @@ #ifndef BITSTREAM_READER_H #define BITSTREAM_READER_H +#include "llvm/ADT/OwningPtr.h" #include "llvm/Bitcode/BitCodes.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/StreamableMemoryObject.h" #include #include #include @@ -36,9 +39,7 @@ class BitstreamReader { std::vector > RecordNames; }; private: - /// FirstChar/LastChar - This remembers the first and last bytes of the - /// stream. - const unsigned char *FirstChar, *LastChar; + OwningPtr BitcodeBytes; std::vector BlockInfoRecords; @@ -47,10 +48,10 @@ class BitstreamReader { /// uses this. bool IgnoreBlockInfoNames; - BitstreamReader(const BitstreamReader&); // NOT IMPLEMENTED - void operator=(const BitstreamReader&); // NOT IMPLEMENTED + BitstreamReader(const BitstreamReader&); // DO NOT IMPLEMENT + void operator=(const BitstreamReader&); // DO NOT IMPLEMENT public: - BitstreamReader() : FirstChar(0), LastChar(0), IgnoreBlockInfoNames(true) { + BitstreamReader() : IgnoreBlockInfoNames(true) { } BitstreamReader(const unsigned char *Start, const unsigned char *End) { @@ -58,12 +59,17 @@ class BitstreamReader { init(Start, End); } - void init(const unsigned char *Start, const unsigned char *End) { - FirstChar = Start; - LastChar = End; - assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes"); + BitstreamReader(StreamableMemoryObject *bytes) { + BitcodeBytes.reset(bytes); } + void init(const unsigned char *Start, const unsigned char *End) { + assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes"); + BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End)); + } + + StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; } + ~BitstreamReader() { // Free the BlockInfoRecords. while (!BlockInfoRecords.empty()) { @@ -75,9 +81,6 @@ class BitstreamReader { BlockInfoRecords.pop_back(); } } - - const unsigned char *getFirstChar() const { return FirstChar; } - const unsigned char *getLastChar() const { return LastChar; } /// CollectBlockInfoNames - This is called by clients that want block/record /// name information. @@ -122,7 +125,7 @@ class BitstreamReader { class BitstreamCursor { friend class Deserializer; BitstreamReader *BitStream; - const unsigned char *NextChar; + size_t NextChar; /// CurWord - This is the current data we have pulled from the stream but have /// not returned to the client. @@ -156,8 +159,7 @@ class BitstreamCursor { } explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) { - NextChar = R.getFirstChar(); - assert(NextChar && "Bitstream not initialized yet"); + NextChar = 0; CurWord = 0; BitsInCurWord = 0; CurCodeSize = 2; @@ -167,8 +169,7 @@ class BitstreamCursor { freeState(); BitStream = &R; - NextChar = R.getFirstChar(); - assert(NextChar && "Bitstream not initialized yet"); + NextChar = 0; CurWord = 0; BitsInCurWord = 0; CurCodeSize = 2; @@ -225,13 +226,39 @@ class BitstreamCursor { /// GetAbbrevIDWidth - Return the number of bits used to encode an abbrev #. unsigned GetAbbrevIDWidth() const { return CurCodeSize; } - bool AtEndOfStream() const { - return NextChar == BitStream->getLastChar() && BitsInCurWord == 0; + bool isEndPos(size_t pos) { + return BitStream->getBitcodeBytes().isObjectEnd(static_cast(pos)); + } + + bool canSkipToPos(size_t pos) const { + // pos can be skipped to if it is a valid address or one byte past the end. + return pos == 0 || BitStream->getBitcodeBytes().isValidAddress( + static_cast(pos - 1)); + } + + unsigned char getByte(size_t pos) { + uint8_t byte = -1; + BitStream->getBitcodeBytes().readByte(pos, &byte); + return byte; + } + + uint32_t getWord(size_t pos) { + uint8_t buf[sizeof(uint32_t)]; + memset(buf, 0xFF, sizeof(buf)); + BitStream->getBitcodeBytes().readBytes(pos, + sizeof(buf), + buf, + NULL); + return *reinterpret_cast(buf); + } + + bool AtEndOfStream() { + return isEndPos(NextChar) && BitsInCurWord == 0; } /// GetCurrentBitNo - Return the bit # of the bit we are reading. uint64_t GetCurrentBitNo() const { - return (NextChar-BitStream->getFirstChar())*CHAR_BIT - BitsInCurWord; + return NextChar*CHAR_BIT - BitsInCurWord; } BitstreamReader *getBitStreamReader() { @@ -246,12 +273,10 @@ class BitstreamCursor { void JumpToBit(uint64_t BitNo) { uintptr_t ByteNo = uintptr_t(BitNo/8) & ~3; uintptr_t WordBitNo = uintptr_t(BitNo) & 31; - assert(ByteNo <= (uintptr_t)(BitStream->getLastChar()- - BitStream->getFirstChar()) && - "Invalid location"); + assert(canSkipToPos(ByteNo) && "Invalid location"); // Move the cursor to the right word. - NextChar = BitStream->getFirstChar()+ByteNo; + NextChar = ByteNo; BitsInCurWord = 0; CurWord = 0; @@ -272,7 +297,7 @@ class BitstreamCursor { } // If we run out of data, stop at the end of the stream. - if (NextChar == BitStream->getLastChar()) { + if (isEndPos(NextChar)) { CurWord = 0; BitsInCurWord = 0; return 0; @@ -281,8 +306,7 @@ class BitstreamCursor { unsigned R = CurWord; // Read the next word from the stream. - CurWord = (NextChar[0] << 0) | (NextChar[1] << 8) | - (NextChar[2] << 16) | (NextChar[3] << 24); + CurWord = getWord(NextChar); NextChar += 4; // Extract NumBits-BitsInCurWord from what we just read. @@ -376,9 +400,8 @@ class BitstreamCursor { // Check that the block wasn't partially defined, and that the offset isn't // bogus. - const unsigned char *const SkipTo = NextChar + NumWords*4; - if (AtEndOfStream() || SkipTo > BitStream->getLastChar() || - SkipTo < BitStream->getFirstChar()) + size_t SkipTo = NextChar + NumWords*4; + if (AtEndOfStream() || !canSkipToPos(SkipTo)) return true; NextChar = SkipTo; @@ -409,8 +432,7 @@ class BitstreamCursor { if (NumWordsP) *NumWordsP = NumWords; // Validate that this block is sane. - if (CurCodeSize == 0 || AtEndOfStream() || - NextChar+NumWords*4 > BitStream->getLastChar()) + if (CurCodeSize == 0 || AtEndOfStream()) return true; return false; @@ -455,10 +477,10 @@ class BitstreamCursor { void ReadAbbreviatedField(const BitCodeAbbrevOp &Op, SmallVectorImpl &Vals) { assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!"); - + // Decode the value as we are commanded. switch (Op.getEncoding()) { - default: assert(0 && "Unknown encoding!"); + default: llvm_unreachable("Unknown encoding!"); case BitCodeAbbrevOp::Fixed: Vals.push_back(Read((unsigned)Op.getEncodingData())); break; @@ -512,24 +534,25 @@ class BitstreamCursor { SkipToWord(); // 32-bit alignment // Figure out where the end of this blob will be including tail padding. - const unsigned char *NewEnd = NextChar+((NumElts+3)&~3); + size_t NewEnd = NextChar+((NumElts+3)&~3); // If this would read off the end of the bitcode file, just set the // record to empty and return. - if (NewEnd > BitStream->getLastChar()) { + if (!canSkipToPos(NewEnd)) { Vals.append(NumElts, 0); - NextChar = BitStream->getLastChar(); + NextChar = BitStream->getBitcodeBytes().getExtent(); break; } // Otherwise, read the number of bytes. If we can return a reference to // the data, do so to avoid copying it. if (BlobStart) { - *BlobStart = (const char*)NextChar; + *BlobStart = (const char*)BitStream->getBitcodeBytes().getPointer( + NextChar, NumElts); *BlobLen = NumElts; } else { for (; NumElts; ++NextChar, --NumElts) - Vals.push_back(*NextChar); + Vals.push_back(getByte(NextChar)); } // Skip over tail padding. NextChar = NewEnd; diff --git a/contrib/llvm/include/llvm/Bitcode/BitstreamWriter.h b/contrib/llvm/include/llvm/Bitcode/BitstreamWriter.h index bfb3a4e49c51..475da133f8a8 100644 --- a/contrib/llvm/include/llvm/Bitcode/BitstreamWriter.h +++ b/contrib/llvm/include/llvm/Bitcode/BitstreamWriter.h @@ -16,13 +16,14 @@ #define BITSTREAM_WRITER_H #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Bitcode/BitCodes.h" #include namespace llvm { class BitstreamWriter { - std::vector &Out; + SmallVectorImpl &Out; /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use. unsigned CurBit; @@ -59,8 +60,40 @@ class BitstreamWriter { }; std::vector BlockInfoRecords; + // BackpatchWord - Backpatch a 32-bit word in the output with the specified + // value. + void BackpatchWord(unsigned ByteNo, unsigned NewWord) { + Out[ByteNo++] = (unsigned char)(NewWord >> 0); + Out[ByteNo++] = (unsigned char)(NewWord >> 8); + Out[ByteNo++] = (unsigned char)(NewWord >> 16); + Out[ByteNo ] = (unsigned char)(NewWord >> 24); + } + + void WriteByte(unsigned char Value) { + Out.push_back(Value); + } + + void WriteWord(unsigned Value) { + unsigned char Bytes[4] = { + (unsigned char)(Value >> 0), + (unsigned char)(Value >> 8), + (unsigned char)(Value >> 16), + (unsigned char)(Value >> 24) }; + Out.append(&Bytes[0], &Bytes[4]); + } + + unsigned GetBufferOffset() const { + return Out.size(); + } + + unsigned GetWordIndex() const { + unsigned Offset = GetBufferOffset(); + assert((Offset & 3) == 0 && "Not 32-bit aligned"); + return Offset / 4; + } + public: - explicit BitstreamWriter(std::vector &O) + explicit BitstreamWriter(SmallVectorImpl &O) : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {} ~BitstreamWriter() { @@ -78,10 +111,8 @@ class BitstreamWriter { } } - std::vector &getBuffer() { return Out; } - /// \brief Retrieve the current position in the stream, in bits. - uint64_t GetCurrentBitNo() const { return Out.size() * 8 + CurBit; } + uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; } //===--------------------------------------------------------------------===// // Basic Primitives for emitting bits to the stream. @@ -97,11 +128,7 @@ class BitstreamWriter { } // Add the current word. - unsigned V = CurValue; - Out.push_back((unsigned char)(V >> 0)); - Out.push_back((unsigned char)(V >> 8)); - Out.push_back((unsigned char)(V >> 16)); - Out.push_back((unsigned char)(V >> 24)); + WriteWord(CurValue); if (CurBit) CurValue = Val >> (32-CurBit); @@ -121,11 +148,7 @@ class BitstreamWriter { void FlushToWord() { if (CurBit) { - unsigned V = CurValue; - Out.push_back((unsigned char)(V >> 0)); - Out.push_back((unsigned char)(V >> 8)); - Out.push_back((unsigned char)(V >> 16)); - Out.push_back((unsigned char)(V >> 24)); + WriteWord(CurValue); CurBit = 0; CurValue = 0; } @@ -164,15 +187,6 @@ class BitstreamWriter { Emit(Val, CurCodeSize); } - // BackpatchWord - Backpatch a 32-bit word in the output with the specified - // value. - void BackpatchWord(unsigned ByteNo, unsigned NewWord) { - Out[ByteNo++] = (unsigned char)(NewWord >> 0); - Out[ByteNo++] = (unsigned char)(NewWord >> 8); - Out[ByteNo++] = (unsigned char)(NewWord >> 16); - Out[ByteNo ] = (unsigned char)(NewWord >> 24); - } - //===--------------------------------------------------------------------===// // Block Manipulation //===--------------------------------------------------------------------===// @@ -199,7 +213,7 @@ class BitstreamWriter { EmitVBR(CodeLen, bitc::CodeLenWidth); FlushToWord(); - unsigned BlockSizeWordLoc = static_cast(Out.size()); + unsigned BlockSizeWordIndex = GetWordIndex(); unsigned OldCodeSize = CurCodeSize; // Emit a placeholder, which will be replaced when the block is popped. @@ -209,7 +223,7 @@ class BitstreamWriter { // Push the outer block's abbrev set onto the stack, start out with an // empty abbrev set. - BlockScope.push_back(Block(OldCodeSize, BlockSizeWordLoc/4)); + BlockScope.push_back(Block(OldCodeSize, BlockSizeWordIndex)); BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); // If there is a blockinfo for this BlockID, add all the predefined abbrevs @@ -239,7 +253,7 @@ class BitstreamWriter { FlushToWord(); // Compute the size of the block, in words, not counting the size field. - unsigned SizeInWords= static_cast(Out.size())/4-B.StartSizeWord-1; + unsigned SizeInWords = GetWordIndex() - B.StartSizeWord - 1; unsigned ByteNo = B.StartSizeWord*4; // Update the block size field in the header of this sub-block. @@ -275,7 +289,7 @@ class BitstreamWriter { // Encode the value as we are commanded. switch (Op.getEncoding()) { - default: assert(0 && "Unknown encoding!"); + default: llvm_unreachable("Unknown encoding!"); case BitCodeAbbrevOp::Fixed: if (Op.getEncodingData()) Emit((unsigned)V, (unsigned)Op.getEncodingData()); @@ -355,25 +369,24 @@ class BitstreamWriter { // Flush to a 32-bit alignment boundary. FlushToWord(); - assert((Out.size() & 3) == 0 && "Not 32-bit aligned"); // Emit each field as a literal byte. if (BlobData) { for (unsigned i = 0; i != BlobLen; ++i) - Out.push_back((unsigned char)BlobData[i]); + WriteByte((unsigned char)BlobData[i]); // Know that blob data is consumed for assertion below. BlobData = 0; } else { for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) { assert(Vals[RecordIdx] < 256 && "Value too large to emit as blob"); - Out.push_back((unsigned char)Vals[RecordIdx]); + WriteByte((unsigned char)Vals[RecordIdx]); } } + // Align end to 32-bits. - while (Out.size() & 3) - Out.push_back(0); - + while (GetBufferOffset() & 3) + WriteByte(0); } else { // Single scalar field. assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); EmitAbbreviatedField(Op, Vals[RecordIdx]); @@ -488,7 +501,7 @@ class BitstreamWriter { /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK. void EnterBlockInfoBlock(unsigned CodeWidth) { EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth); - BlockInfoCurBID = -1U; + BlockInfoCurBID = ~0U; } private: /// SwitchToBlockID - If we aren't already talking about the specified block diff --git a/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 4b0dcc36232f..a8c34cb82995 100644 --- a/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -29,23 +29,21 @@ namespace bitc { // Module sub-block id's. PARAMATTR_BLOCK_ID, - - /// TYPE_BLOCK_ID_OLD - This is the type descriptor block in LLVM 2.9 and - /// earlier, replaced with TYPE_BLOCK_ID2. FIXME: Remove in LLVM 3.1. - TYPE_BLOCK_ID_OLD, + + UNUSED_ID1, CONSTANTS_BLOCK_ID, FUNCTION_BLOCK_ID, - /// TYPE_SYMTAB_BLOCK_ID_OLD - This type descriptor is from LLVM 2.9 and - /// earlier bitcode files. FIXME: Remove in LLVM 3.1 - TYPE_SYMTAB_BLOCK_ID_OLD, + UNUSED_ID2, VALUE_SYMTAB_BLOCK_ID, METADATA_BLOCK_ID, METADATA_ATTACHMENT_ID, - TYPE_BLOCK_ID_NEW + TYPE_BLOCK_ID_NEW, + + USELIST_BLOCK_ID }; @@ -63,10 +61,10 @@ namespace bitc { MODULE_CODE_GLOBALVAR = 7, // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment, - // section, visibility] + // section, visibility, gc, unnamed_addr] MODULE_CODE_FUNCTION = 8, - // ALIAS: [alias type, aliasee val#, linkage] + // ALIAS: [alias type, aliasee val#, linkage, visibility] MODULE_CODE_ALIAS = 9, /// MODULE_CODE_PURGEVALS: [numvals] @@ -92,11 +90,12 @@ namespace bitc { TYPE_CODE_OPAQUE = 6, // OPAQUE TYPE_CODE_INTEGER = 7, // INTEGER: [width] TYPE_CODE_POINTER = 8, // POINTER: [pointee type] - TYPE_CODE_FUNCTION = 9, // FUNCTION: [vararg, retty, paramty x N] + + TYPE_CODE_FUNCTION_OLD = 9, // FUNCTION: [vararg, attrid, retty, + // paramty x N] + + TYPE_CODE_HALF = 10, // HALF - // FIXME: This is the encoding used for structs in LLVM 2.9 and earlier. - // REMOVE this in LLVM 3.1 - TYPE_CODE_STRUCT_OLD = 10, // STRUCT: [ispacked, eltty x N] TYPE_CODE_ARRAY = 11, // ARRAY: [numelts, eltty] TYPE_CODE_VECTOR = 12, // VECTOR: [numelts, eltty] @@ -113,7 +112,9 @@ namespace bitc { TYPE_CODE_STRUCT_ANON = 18, // STRUCT_ANON: [ispacked, eltty x N] TYPE_CODE_STRUCT_NAME = 19, // STRUCT_NAME: [strchr x N] - TYPE_CODE_STRUCT_NAMED = 20 // STRUCT_NAMED: [ispacked, eltty x N] + TYPE_CODE_STRUCT_NAMED = 20,// STRUCT_NAMED: [ispacked, eltty x N] + + TYPE_CODE_FUNCTION = 21 // FUNCTION: [vararg, retty, paramty x N] }; // The type symbol table only has one code (TST_ENTRY_CODE). @@ -163,7 +164,8 @@ namespace bitc { CST_CODE_INLINEASM = 18, // INLINEASM: [sideeffect,asmstr,conststr] CST_CODE_CE_SHUFVEC_EX = 19, // SHUFVEC_EX: [opty, opval, opval, opval] CST_CODE_CE_INBOUNDS_GEP = 20,// INBOUNDS_GEP: [n x operands] - CST_CODE_BLOCKADDRESS = 21 // CST_CODE_BLOCKADDRESS [fnty, fnval, bb#] + CST_CODE_BLOCKADDRESS = 21, // CST_CODE_BLOCKADDRESS [fnty, fnval, bb#] + CST_CODE_DATA = 22 // DATA: [n x elements] }; /// CastOpcodes - These are values used in the bitcode files to encode which @@ -270,7 +272,7 @@ namespace bitc { FUNC_CODE_INST_BR = 11, // BR: [bb#, bb#, cond] or [bb#] FUNC_CODE_INST_SWITCH = 12, // SWITCH: [opty, op0, op1, ...] FUNC_CODE_INST_INVOKE = 13, // INVOKE: [attr, fnty, op0,op1, ...] - FUNC_CODE_INST_UNWIND = 14, // UNWIND + // 14 is unused. FUNC_CODE_INST_UNREACHABLE = 15, // UNREACHABLE FUNC_CODE_INST_PHI = 16, // PHI: [ty, val0,bb0, ...] @@ -314,6 +316,10 @@ namespace bitc { FUNC_CODE_INST_STOREATOMIC = 42 // STORE: [ptrty,ptr,val, align, vol // ordering, synchscope] }; + + enum UseListCodes { + USELIST_CODE_ENTRY = 1 // USELIST_CODE_ENTRY: TBD. + }; } // End bitc namespace } // End llvm namespace diff --git a/contrib/llvm/include/llvm/Bitcode/ReaderWriter.h b/contrib/llvm/include/llvm/Bitcode/ReaderWriter.h index fa754c014621..cc2b473f2c57 100644 --- a/contrib/llvm/include/llvm/Bitcode/ReaderWriter.h +++ b/contrib/llvm/include/llvm/Bitcode/ReaderWriter.h @@ -17,35 +17,45 @@ #include namespace llvm { - class Module; - class MemoryBuffer; - class ModulePass; class BitstreamWriter; + class MemoryBuffer; + class DataStreamer; class LLVMContext; + class Module; + class ModulePass; class raw_ostream; - + /// getLazyBitcodeModule - Read the header of the specified bitcode buffer /// and prepare for lazy deserialization of function bodies. If successful, /// this takes ownership of 'buffer' and returns a non-null pointer. On /// error, this returns null, *does not* take ownership of Buffer, and fills /// in *ErrMsg with an error description if ErrMsg is non-null. Module *getLazyBitcodeModule(MemoryBuffer *Buffer, - LLVMContext& Context, + LLVMContext &Context, std::string *ErrMsg = 0); + /// getStreamedBitcodeModule - Read the header of the specified stream + /// and prepare for lazy deserialization and streaming of function bodies. + /// On error, this returns null, and fills in *ErrMsg with an error + /// description if ErrMsg is non-null. + Module *getStreamedBitcodeModule(const std::string &name, + DataStreamer *streamer, + LLVMContext &Context, + std::string *ErrMsg = 0); + /// getBitcodeTargetTriple - Read the header of the specified bitcode /// buffer and extract just the triple information. If successful, /// this returns a string and *does not* take ownership /// of 'buffer'. On error, this returns "", and fills in *ErrMsg /// if ErrMsg is non-null. std::string getBitcodeTargetTriple(MemoryBuffer *Buffer, - LLVMContext& Context, + LLVMContext &Context, std::string *ErrMsg = 0); /// ParseBitcodeFile - Read the specified bitcode file, returning the module. /// If an error occurs, this returns null and fills in *ErrMsg if it is /// non-null. This method *never* takes ownership of Buffer. - Module *ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context, + Module *ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext &Context, std::string *ErrMsg = 0); /// WriteBitcodeToFile - Write the specified module to the specified @@ -53,15 +63,11 @@ namespace llvm { /// should be in "binary" mode. void WriteBitcodeToFile(const Module *M, raw_ostream &Out); - /// WriteBitcodeToStream - Write the specified module to the specified - /// raw output stream. - void WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream); - /// createBitcodeWriterPass - Create and return a pass that writes the module /// to the specified ostream. ModulePass *createBitcodeWriterPass(raw_ostream &Str); - - + + /// isBitcodeWrapper - Return true if the given bytes are the magic bytes /// for an LLVM IR bitcode wrapper. /// @@ -109,21 +115,24 @@ namespace llvm { /// uint32_t BitcodeSize; // Size of traditional bitcode file. /// ... potentially other gunk ... /// }; - /// + /// /// This function is called when we find a file with a matching magic number. /// In this case, skip down to the subsection of the file that is actually a /// BC file. - static inline bool SkipBitcodeWrapperHeader(unsigned char *&BufPtr, - unsigned char *&BufEnd) { + /// If 'VerifyBufferSize' is true, check that the buffer is large enough to + /// contain the whole bitcode file. + static inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr, + const unsigned char *&BufEnd, + bool VerifyBufferSize) { enum { KnownHeaderSize = 4*4, // Size of header we read. OffsetField = 2*4, // Offset in bytes to Offset field. SizeField = 3*4 // Offset in bytes to Size field. }; - + // Must contain the header! if (BufEnd-BufPtr < KnownHeaderSize) return true; - + unsigned Offset = ( BufPtr[OffsetField ] | (BufPtr[OffsetField+1] << 8) | (BufPtr[OffsetField+2] << 16) | @@ -132,9 +141,9 @@ namespace llvm { (BufPtr[SizeField +1] << 8) | (BufPtr[SizeField +2] << 16) | (BufPtr[SizeField +3] << 24)); - + // Verify that Offset+Size fits in the file. - if (Offset+Size > unsigned(BufEnd-BufPtr)) + if (VerifyBufferSize && Offset+Size > unsigned(BufEnd-BufPtr)) return true; BufPtr += Offset; BufEnd = BufPtr+Size; diff --git a/contrib/llvm/include/llvm/CodeGen/Analysis.h b/contrib/llvm/include/llvm/CodeGen/Analysis.h index d8e64071a1d9..0b609ed6586e 100644 --- a/contrib/llvm/include/llvm/CodeGen/Analysis.h +++ b/contrib/llvm/include/llvm/CodeGen/Analysis.h @@ -27,6 +27,7 @@ namespace llvm { class GlobalVariable; class TargetLowering; class SDNode; +class SDValue; class SelectionDAG; /// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence @@ -70,6 +71,10 @@ bool hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos, /// ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred); +/// getFCmpCodeWithoutNaN - Given an ISD condition code comparing floats, +/// return the equivalent code if we're allowed to assume that NaNs won't occur. +ISD::CondCode getFCmpCodeWithoutNaN(ISD::CondCode CC); + /// getICmpCondCode - Return the ISD condition code corresponding to /// the given LLVM IR integer condition code. /// @@ -85,7 +90,7 @@ bool isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, const TargetLowering &TLI); bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, - const TargetLowering &TLI); + SDValue &Chain, const TargetLowering &TLI); } // End llvm namespace diff --git a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h index 06c5c83c95ec..56a87f139a21 100644 --- a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -18,16 +18,12 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" namespace llvm { class BlockAddress; class GCStrategy; class Constant; - class ConstantArray; - class ConstantFP; - class ConstantInt; - class ConstantStruct; - class ConstantVector; class GCMetadataPrinter; class GlobalValue; class GlobalVariable; @@ -37,14 +33,11 @@ namespace llvm { class MachineLocation; class MachineLoopInfo; class MachineLoop; - class MachineConstantPool; - class MachineConstantPoolEntry; class MachineConstantPoolValue; class MachineJumpTableInfo; class MachineModuleInfo; class MachineMove; class MCAsmInfo; - class MCInst; class MCContext; class MCSection; class MCStreamer; @@ -56,8 +49,6 @@ namespace llvm { class TargetLoweringObjectFile; class TargetData; class TargetMachine; - class Twine; - class Type; /// AsmPrinter - This class is intended to be used as a driving class for all /// asm writers. @@ -97,6 +88,11 @@ namespace llvm { /// MCSymbol *CurrentFnSym; + /// The symbol used to represent the start of the current function for the + /// purpose of calculating its size (e.g. using the .size directive). By + /// default, this is equal to CurrentFnSym. + MCSymbol *CurrentFnSymForSize; + private: // GCMetadataPrinters - The garbage collection metadata printer table. void *GCMetadataPrinters; // Really a DenseMap. @@ -194,6 +190,11 @@ namespace llvm { bool needsSEHMoves(); + /// needsRelocationsForDwarfStringPool - Specifies whether the object format + /// expects to use relocations to refer to debug entries. Alternatively we + /// emit section offsets in bytes from the start of the string pool. + bool needsRelocationsForDwarfStringPool() const; + /// EmitConstantPool - Print to the current output stream assembly /// representations of the constants in the constant pool MCP. This is /// used to print out constants which have been "spilled to memory" by @@ -256,13 +257,20 @@ namespace llvm { /// EmitInstruction - Targets should implement this to emit instructions. virtual void EmitInstruction(const MachineInstr *) { - assert(0 && "EmitInstruction not implemented"); + llvm_unreachable("EmitInstruction not implemented"); } virtual void EmitFunctionEntryLabel(); virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV); + /// EmitXXStructor - Targets can override this to change how global + /// constants that are part of a C++ static/global constructor list are + /// emitted. + virtual void EmitXXStructor(const Constant *CV) { + EmitGlobalConstant(CV); + } + /// isBlockOnlyReachableByFallthough - Return true if the basic block has /// exactly one predecessor and the control transfer mechanism between /// the predecessor and this block is a fall-through. @@ -466,7 +474,7 @@ namespace llvm { const MachineBasicBlock *MBB, unsigned uid) const; void EmitLLVMUsedList(const Constant *List); - void EmitXXStructorList(const Constant *List); + void EmitXXStructorList(const Constant *List, bool isCtor); GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy *C); }; } diff --git a/contrib/llvm/include/llvm/CodeGen/BinaryObject.h b/contrib/llvm/include/llvm/CodeGen/BinaryObject.h deleted file mode 100644 index 8c1431ffbeed..000000000000 --- a/contrib/llvm/include/llvm/CodeGen/BinaryObject.h +++ /dev/null @@ -1,353 +0,0 @@ -//===-- llvm/CodeGen/BinaryObject.h - Binary Object. -----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines a Binary Object Aka. "blob" for holding data from code -// generators, ready for data to the object module code writters. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_BINARYOBJECT_H -#define LLVM_CODEGEN_BINARYOBJECT_H - -#include "llvm/CodeGen/MachineRelocation.h" -#include "llvm/Support/DataTypes.h" - -#include -#include - -namespace llvm { - -typedef std::vector BinaryData; - -class BinaryObject { -protected: - std::string Name; - bool IsLittleEndian; - bool Is64Bit; - BinaryData Data; - std::vector Relocations; - -public: - /// Constructors and destructor - BinaryObject() {} - - BinaryObject(bool isLittleEndian, bool is64Bit) - : IsLittleEndian(isLittleEndian), Is64Bit(is64Bit) {} - - BinaryObject(const std::string &name, bool isLittleEndian, bool is64Bit) - : Name(name), IsLittleEndian(isLittleEndian), Is64Bit(is64Bit) {} - - ~BinaryObject() {} - - /// getName - get name of BinaryObject - inline std::string getName() const { return Name; } - - /// get size of binary data - size_t size() const { - return Data.size(); - } - - /// get binary data - BinaryData& getData() { - return Data; - } - - /// get machine relocations - const std::vector& getRelocations() const { - return Relocations; - } - - /// hasRelocations - Return true if 'Relocations' is not empty - bool hasRelocations() const { - return !Relocations.empty(); - } - - /// emitZeros - This callback is invoked to emit a arbitrary number - /// of zero bytes to the data stream. - inline void emitZeros(unsigned Size) { - for (unsigned i=0; i < Size; ++i) - emitByte(0); - } - - /// emitByte - This callback is invoked when a byte needs to be - /// written to the data stream. - inline void emitByte(uint8_t B) { - Data.push_back(B); - } - - /// emitWord16 - This callback is invoked when a 16-bit word needs to be - /// written to the data stream in correct endian format and correct size. - inline void emitWord16(uint16_t W) { - if (IsLittleEndian) - emitWord16LE(W); - else - emitWord16BE(W); - } - - /// emitWord16LE - This callback is invoked when a 16-bit word needs to be - /// written to the data stream in correct endian format and correct size. - inline void emitWord16LE(uint16_t W) { - Data.push_back((uint8_t)(W >> 0)); - Data.push_back((uint8_t)(W >> 8)); - } - - /// emitWord16BE - This callback is invoked when a 16-bit word needs to be - /// written to the data stream in correct endian format and correct size. - inline void emitWord16BE(uint16_t W) { - Data.push_back((uint8_t)(W >> 8)); - Data.push_back((uint8_t)(W >> 0)); - } - - /// emitWord - This callback is invoked when a word needs to be - /// written to the data stream in correct endian format and correct size. - inline void emitWord(uint64_t W) { - if (!Is64Bit) - emitWord32(W); - else - emitWord64(W); - } - - /// emitWord32 - This callback is invoked when a 32-bit word needs to be - /// written to the data stream in correct endian format. - inline void emitWord32(uint32_t W) { - if (IsLittleEndian) - emitWordLE(W); - else - emitWordBE(W); - } - - /// emitWord64 - This callback is invoked when a 32-bit word needs to be - /// written to the data stream in correct endian format. - inline void emitWord64(uint64_t W) { - if (IsLittleEndian) - emitDWordLE(W); - else - emitDWordBE(W); - } - - /// emitWord64 - This callback is invoked when a x86_fp80 needs to be - /// written to the data stream in correct endian format. - inline void emitWordFP80(const uint64_t *W, unsigned PadSize) { - if (IsLittleEndian) { - emitWord64(W[0]); - emitWord16(W[1]); - } else { - emitWord16(W[1]); - emitWord64(W[0]); - } - emitZeros(PadSize); - } - - /// emitWordLE - This callback is invoked when a 32-bit word needs to be - /// written to the data stream in little-endian format. - inline void emitWordLE(uint32_t W) { - Data.push_back((uint8_t)(W >> 0)); - Data.push_back((uint8_t)(W >> 8)); - Data.push_back((uint8_t)(W >> 16)); - Data.push_back((uint8_t)(W >> 24)); - } - - /// emitWordBE - This callback is invoked when a 32-bit word needs to be - /// written to the data stream in big-endian format. - /// - inline void emitWordBE(uint32_t W) { - Data.push_back((uint8_t)(W >> 24)); - Data.push_back((uint8_t)(W >> 16)); - Data.push_back((uint8_t)(W >> 8)); - Data.push_back((uint8_t)(W >> 0)); - } - - /// emitDWordLE - This callback is invoked when a 64-bit word needs to be - /// written to the data stream in little-endian format. - inline void emitDWordLE(uint64_t W) { - Data.push_back((uint8_t)(W >> 0)); - Data.push_back((uint8_t)(W >> 8)); - Data.push_back((uint8_t)(W >> 16)); - Data.push_back((uint8_t)(W >> 24)); - Data.push_back((uint8_t)(W >> 32)); - Data.push_back((uint8_t)(W >> 40)); - Data.push_back((uint8_t)(W >> 48)); - Data.push_back((uint8_t)(W >> 56)); - } - - /// emitDWordBE - This callback is invoked when a 64-bit word needs to be - /// written to the data stream in big-endian format. - inline void emitDWordBE(uint64_t W) { - Data.push_back((uint8_t)(W >> 56)); - Data.push_back((uint8_t)(W >> 48)); - Data.push_back((uint8_t)(W >> 40)); - Data.push_back((uint8_t)(W >> 32)); - Data.push_back((uint8_t)(W >> 24)); - Data.push_back((uint8_t)(W >> 16)); - Data.push_back((uint8_t)(W >> 8)); - Data.push_back((uint8_t)(W >> 0)); - } - - /// fixByte - This callback is invoked when a byte needs to be - /// fixup the buffer. - inline void fixByte(uint8_t B, uint32_t offset) { - Data[offset] = B; - } - - /// fixWord16 - This callback is invoked when a 16-bit word needs to - /// fixup the data stream in correct endian format. - inline void fixWord16(uint16_t W, uint32_t offset) { - if (IsLittleEndian) - fixWord16LE(W, offset); - else - fixWord16BE(W, offset); - } - - /// emitWord16LE - This callback is invoked when a 16-bit word needs to - /// fixup the data stream in little endian format. - inline void fixWord16LE(uint16_t W, uint32_t offset) { - Data[offset] = (uint8_t)(W >> 0); - Data[++offset] = (uint8_t)(W >> 8); - } - - /// fixWord16BE - This callback is invoked when a 16-bit word needs to - /// fixup data stream in big endian format. - inline void fixWord16BE(uint16_t W, uint32_t offset) { - Data[offset] = (uint8_t)(W >> 8); - Data[++offset] = (uint8_t)(W >> 0); - } - - /// emitWord - This callback is invoked when a word needs to - /// fixup the data in correct endian format and correct size. - inline void fixWord(uint64_t W, uint32_t offset) { - if (!Is64Bit) - fixWord32(W, offset); - else - fixWord64(W, offset); - } - - /// fixWord32 - This callback is invoked when a 32-bit word needs to - /// fixup the data in correct endian format. - inline void fixWord32(uint32_t W, uint32_t offset) { - if (IsLittleEndian) - fixWord32LE(W, offset); - else - fixWord32BE(W, offset); - } - - /// fixWord32LE - This callback is invoked when a 32-bit word needs to - /// fixup the data in little endian format. - inline void fixWord32LE(uint32_t W, uint32_t offset) { - Data[offset] = (uint8_t)(W >> 0); - Data[++offset] = (uint8_t)(W >> 8); - Data[++offset] = (uint8_t)(W >> 16); - Data[++offset] = (uint8_t)(W >> 24); - } - - /// fixWord32BE - This callback is invoked when a 32-bit word needs to - /// fixup the data in big endian format. - inline void fixWord32BE(uint32_t W, uint32_t offset) { - Data[offset] = (uint8_t)(W >> 24); - Data[++offset] = (uint8_t)(W >> 16); - Data[++offset] = (uint8_t)(W >> 8); - Data[++offset] = (uint8_t)(W >> 0); - } - - /// fixWord64 - This callback is invoked when a 64-bit word needs to - /// fixup the data in correct endian format. - inline void fixWord64(uint64_t W, uint32_t offset) { - if (IsLittleEndian) - fixWord64LE(W, offset); - else - fixWord64BE(W, offset); - } - - /// fixWord64BE - This callback is invoked when a 64-bit word needs to - /// fixup the data in little endian format. - inline void fixWord64LE(uint64_t W, uint32_t offset) { - Data[offset] = (uint8_t)(W >> 0); - Data[++offset] = (uint8_t)(W >> 8); - Data[++offset] = (uint8_t)(W >> 16); - Data[++offset] = (uint8_t)(W >> 24); - Data[++offset] = (uint8_t)(W >> 32); - Data[++offset] = (uint8_t)(W >> 40); - Data[++offset] = (uint8_t)(W >> 48); - Data[++offset] = (uint8_t)(W >> 56); - } - - /// fixWord64BE - This callback is invoked when a 64-bit word needs to - /// fixup the data in big endian format. - inline void fixWord64BE(uint64_t W, uint32_t offset) { - Data[offset] = (uint8_t)(W >> 56); - Data[++offset] = (uint8_t)(W >> 48); - Data[++offset] = (uint8_t)(W >> 40); - Data[++offset] = (uint8_t)(W >> 32); - Data[++offset] = (uint8_t)(W >> 24); - Data[++offset] = (uint8_t)(W >> 16); - Data[++offset] = (uint8_t)(W >> 8); - Data[++offset] = (uint8_t)(W >> 0); - } - - /// emitAlignment - Pad the data to the specified alignment. - void emitAlignment(unsigned Alignment, uint8_t fill = 0) { - if (Alignment <= 1) return; - unsigned PadSize = -Data.size() & (Alignment-1); - for (unsigned i = 0; i>= 7; - if (Value) Byte |= 0x80; - emitByte(Byte); - } while (Value); - } - - /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be - /// written to the data stream. - void emitSLEB128Bytes(int64_t Value) { - int Sign = Value >> (8 * sizeof(Value) - 1); - bool IsMore; - - do { - uint8_t Byte = (uint8_t)(Value & 0x7f); - Value >>= 7; - IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0; - if (IsMore) Byte |= 0x80; - emitByte(Byte); - } while (IsMore); - } - - /// emitString - This callback is invoked when a String needs to be - /// written to the data stream. - void emitString(const std::string &String) { - for (unsigned i = 0, N = static_cast(String.size()); i + +namespace llvm { + +class MCInstrDesc; +class MachineInstr; +class MachineLoopInfo; +class MachineDominatorTree; +class InstrItineraryData; +class DefaultVLIWScheduler; +class SUnit; + +class DFAPacketizer { +private: + typedef std::pair UnsignPair; + const InstrItineraryData *InstrItins; + int CurrentState; + const int (*DFAStateInputTable)[2]; + const unsigned *DFAStateEntryTable; + + // CachedTable is a map from to ToState. + DenseMap CachedTable; + + // ReadTable - Read the DFA transition table and update CachedTable. + void ReadTable(unsigned int state); + +public: + DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2], + const unsigned *SET); + + // Reset the current state to make all resources available. + void clearResources() { + CurrentState = 0; + } + + // canReserveResources - Check if the resources occupied by a MCInstrDesc + // are available in the current state. + bool canReserveResources(const llvm::MCInstrDesc *MID); + + // reserveResources - Reserve the resources occupied by a MCInstrDesc and + // change the current state to reflect that change. + void reserveResources(const llvm::MCInstrDesc *MID); + + // canReserveResources - Check if the resources occupied by a machine + // instruction are available in the current state. + bool canReserveResources(llvm::MachineInstr *MI); + + // reserveResources - Reserve the resources occupied by a machine + // instruction and change the current state to reflect that change. + void reserveResources(llvm::MachineInstr *MI); + + const InstrItineraryData *getInstrItins() const { return InstrItins; } +}; + +// VLIWPacketizerList - Implements a simple VLIW packetizer using DFA. The +// packetizer works on machine basic blocks. For each instruction I in BB, the +// packetizer consults the DFA to see if machine resources are available to +// execute I. If so, the packetizer checks if I depends on any instruction J in +// the current packet. If no dependency is found, I is added to current packet +// and machine resource is marked as taken. If any dependency is found, a target +// API call is made to prune the dependence. +class VLIWPacketizerList { +protected: + const TargetMachine &TM; + const MachineFunction &MF; + const TargetInstrInfo *TII; + + // The VLIW Scheduler. + DefaultVLIWScheduler *VLIWScheduler; + + // Vector of instructions assigned to the current packet. + std::vector CurrentPacketMIs; + // DFA resource tracker. + DFAPacketizer *ResourceTracker; + + // Generate MI -> SU map. + std::map MIToSUnit; + +public: + VLIWPacketizerList( + MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, + bool IsPostRA); + + virtual ~VLIWPacketizerList(); + + // PacketizeMIs - Implement this API in the backend to bundle instructions. + void PacketizeMIs(MachineBasicBlock *MBB, + MachineBasicBlock::iterator BeginItr, + MachineBasicBlock::iterator EndItr); + + // getResourceTracker - return ResourceTracker + DFAPacketizer *getResourceTracker() {return ResourceTracker;} + + // addToPacket - Add MI to the current packet. + virtual MachineBasicBlock::iterator addToPacket(MachineInstr *MI) { + MachineBasicBlock::iterator MII = MI; + CurrentPacketMIs.push_back(MI); + ResourceTracker->reserveResources(MI); + return MII; + } + + // endPacket - End the current packet. + void endPacket(MachineBasicBlock *MBB, MachineInstr *MI); + + // initPacketizerState - perform initialization before packetizing + // an instruction. This function is supposed to be overrided by + // the target dependent packetizer. + virtual void initPacketizerState(void) { return; } + + // ignorePseudoInstruction - Ignore bundling of pseudo instructions. + virtual bool ignorePseudoInstruction(MachineInstr *I, + MachineBasicBlock *MBB) { + return false; + } + + // isSoloInstruction - return true if instruction MI can not be packetized + // with any other instruction, which means that MI itself is a packet. + virtual bool isSoloInstruction(MachineInstr *MI) { + return true; + } + + // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ + // together. + virtual bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { + return false; + } + + // isLegalToPruneDependencies - Is it legal to prune dependece between SUI + // and SUJ. + virtual bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) { + return false; + } + +}; +} + +#endif diff --git a/contrib/llvm/include/llvm/CodeGen/EdgeBundles.h b/contrib/llvm/include/llvm/CodeGen/EdgeBundles.h index 8aab3c64f170..a1d29b1f02c5 100644 --- a/contrib/llvm/include/llvm/CodeGen/EdgeBundles.h +++ b/contrib/llvm/include/llvm/CodeGen/EdgeBundles.h @@ -18,6 +18,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IntEqClasses.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { @@ -61,7 +62,7 @@ class EdgeBundles : public MachineFunctionPass { /// Specialize WriteGraph, the standard implementation won't work. raw_ostream &WriteGraph(raw_ostream &O, const EdgeBundles &G, bool ShortNames = false, - const std::string &Title = ""); + const Twine &Title = ""); } // end namespace llvm diff --git a/contrib/llvm/include/llvm/CodeGen/FastISel.h b/contrib/llvm/include/llvm/CodeGen/FastISel.h index 18202d93b460..e57c8b18c63e 100644 --- a/contrib/llvm/include/llvm/CodeGen/FastISel.h +++ b/contrib/llvm/include/llvm/CodeGen/FastISel.h @@ -21,9 +21,11 @@ namespace llvm { class AllocaInst; +class Constant; class ConstantFP; class FunctionLoweringInfo; class Instruction; +class LoadInst; class MachineBasicBlock; class MachineConstantPool; class MachineFunction; @@ -36,7 +38,8 @@ class TargetLowering; class TargetMachine; class TargetRegisterClass; class TargetRegisterInfo; -class LoadInst; +class User; +class Value; /// FastISel - This is a fast-path instruction selection class that /// generates poor code and doesn't support illegal types or non-trivial @@ -358,6 +361,8 @@ class FastISel { bool SelectExtractValue(const User *I); + bool SelectInsertValue(const User *I); + /// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks. /// Emit code to ensure constants are copied into registers when needed. /// Remember the virtual registers that need to be added to the Machine PHI @@ -378,6 +383,10 @@ class FastISel { /// hasTrivialKill - Test whether the given value has exactly one use. bool hasTrivialKill(const Value *V) const; + + /// removeDeadCode - Remove all dead instructions between the I and E. + void removeDeadCode(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E); }; } diff --git a/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h index 09dac8547a62..8cf22eca4fa6 100644 --- a/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -21,10 +21,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#ifndef NDEBUG -#include "llvm/ADT/SmallSet.h" -#endif #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -98,8 +96,8 @@ class FunctionLoweringInfo { MachineBasicBlock::iterator InsertPt; #ifndef NDEBUG - SmallSet CatchInfoLost; - SmallSet CatchInfoFound; + SmallPtrSet CatchInfoLost; + SmallPtrSet CatchInfoFound; #endif struct LiveOutInfo { @@ -112,7 +110,7 @@ class FunctionLoweringInfo { /// VisitedBBs - The set of basic blocks visited thus far by instruction /// selection. - DenseSet VisitedBBs; + SmallPtrSet VisitedBBs; /// PHINodesToUpdate - A list of phi instructions whose operand list will /// be updated after processing the current basic block. @@ -202,7 +200,7 @@ class FunctionLoweringInfo { /// setArgumentFrameIndex - Record frame index for the byval /// argument. void setArgumentFrameIndex(const Argument *A, int FI); - + /// getArgumentFrameIndex - Get frame index for the byval argument. int getArgumentFrameIndex(const Argument *A); @@ -211,16 +209,18 @@ class FunctionLoweringInfo { IndexedMap LiveOutRegInfo; }; +/// ComputeUsesVAFloatArgument - Determine if any floating-point values are +/// being passed to this variadic function, and set the MachineModuleInfo's +/// usesVAFloatArgument flag if so. This flag is used to emit an undefined +/// reference to _fltused on Windows, which will link in MSVCRT's +/// floating-point support. +void ComputeUsesVAFloatArgument(const CallInst &I, MachineModuleInfo *MMI); + /// AddCatchInfo - Extract the personality and type infos from an eh.selector /// call, and add them to the specified machine basic block. void AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB); -/// CopyCatchInfo - Copy catch information from SuccBB (or one of its -/// successors) to LPad. -void CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad, - MachineModuleInfo *MMI, FunctionLoweringInfo &FLI); - /// AddLandingPadInfo - Extract the exception handling information from the /// landingpad instruction and add them to the specified machine module info. void AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, diff --git a/contrib/llvm/include/llvm/CodeGen/GCStrategy.h b/contrib/llvm/include/llvm/CodeGen/GCStrategy.h index cd760dba92aa..1cbd36abfbf8 100644 --- a/contrib/llvm/include/llvm/CodeGen/GCStrategy.h +++ b/contrib/llvm/include/llvm/CodeGen/GCStrategy.h @@ -37,6 +37,7 @@ #define LLVM_CODEGEN_GCSTRATEGY_H #include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/Support/Registry.h" #include @@ -68,6 +69,8 @@ namespace llvm { bool CustomReadBarriers; //< Default is to insert loads. bool CustomWriteBarriers; //< Default is to insert stores. bool CustomRoots; //< Default is to pass through to backend. + bool CustomSafePoints; //< Default is to use NeededSafePoints + // to find safe points. bool InitRoots; //< If set, roots are nulled during lowering. bool UsesMetadata; //< If set, backend must emit metadata tables. @@ -87,7 +90,9 @@ namespace llvm { /// needsSafePoitns - True if safe points of any kind are required. By // default, none are recorded. - bool needsSafePoints() const { return NeededSafePoints != 0; } + bool needsSafePoints() const { + return CustomSafePoints || NeededSafePoints != 0; + } /// needsSafePoint(Kind) - True if the given kind of safe point is // required. By default, none are recorded. @@ -109,6 +114,11 @@ namespace llvm { /// can generate a stack map. If true, then // performCustomLowering must delete them. bool customRoots() const { return CustomRoots; } + + /// customSafePoints - By default, the GC analysis will find safe + /// points according to NeededSafePoints. If true, + /// then findCustomSafePoints must create them. + bool customSafePoints() const { return CustomSafePoints; } /// initializeRoots - If set, gcroot intrinsics should initialize their // allocas to null before the first use. This is @@ -135,6 +145,7 @@ namespace llvm { /// which the LLVM IR can be modified. virtual bool initializeCustomLowering(Module &F); virtual bool performCustomLowering(Function &F); + virtual bool findCustomSafePoints(GCFunctionInfo& FI, MachineFunction& MF); }; } diff --git a/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h b/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h index 184e96dc4766..ab8ab5dd7b4e 100644 --- a/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -57,7 +57,7 @@ namespace ISD { AssertSext, AssertZext, // Various leaf nodes. - BasicBlock, VALUETYPE, CONDCODE, Register, + BasicBlock, VALUETYPE, CONDCODE, Register, RegisterMask, Constant, ConstantFP, GlobalAddress, GlobalTLSAddress, FrameIndex, JumpTable, ConstantPool, ExternalSymbol, BlockAddress, @@ -107,13 +107,6 @@ namespace ISD { // and returns an outchain. EH_SJLJ_LONGJMP, - // OUTCHAIN = EH_SJLJ_DISPATCHSETUP(INCHAIN, setjmpval) - // This corresponds to the eh.sjlj.dispatchsetup intrinsic. It takes an - // input chain and the value returning from setjmp as inputs and returns an - // outchain. By default, this does nothing. Targets can lower this to unwind - // setup code if needed. - EH_SJLJ_DISPATCHSETUP, - // TargetConstant* - Like Constant*, but the DAG does not do any folding, // simplification, or lowering of the constant. They are used for constants // which are known to fit in the immediate fields of their users, or for @@ -319,6 +312,9 @@ namespace ISD { /// Byte Swap and Counting operators. BSWAP, CTTZ, CTLZ, CTPOP, + /// Bit counting operators with an undefined result for zero inputs. + CTTZ_ZERO_UNDEF, CTLZ_ZERO_UNDEF, + // Select(COND, TRUEVAL, FALSEVAL). If the type of the boolean COND is not // i1 then the high bits must conform to getBooleanContents. SELECT, @@ -327,6 +323,9 @@ namespace ISD { // and #2), returning a vector result. All vectors have the same length. // Much like the scalar select and setcc, each bit in the condition selects // whether the corresponding result element is taken from op #1 or op #2. + // At first, the VSELECT condition is of vXi1 type. Later, targets may change + // the condition type in order to match the VSELECT node using a a pattern. + // The condition follows the BooleanContent format of the target. VSELECT, // Select with condition operator - This selects between a true value and diff --git a/contrib/llvm/include/llvm/CodeGen/JITCodeEmitter.h b/contrib/llvm/include/llvm/CodeGen/JITCodeEmitter.h index 88e22d6a24ce..89f00e91f78e 100644 --- a/contrib/llvm/include/llvm/CodeGen/JITCodeEmitter.h +++ b/contrib/llvm/include/llvm/CodeGen/JITCodeEmitter.h @@ -51,6 +51,7 @@ class Function; /// occurred, more memory is allocated, and we reemit the code into it. /// class JITCodeEmitter : public MachineCodeEmitter { + virtual void anchor(); public: virtual ~JITCodeEmitter() {} diff --git a/contrib/llvm/include/llvm/CodeGen/LatencyPriorityQueue.h b/contrib/llvm/include/llvm/CodeGen/LatencyPriorityQueue.h index 1ed2547ca6cf..8fb31aa8a6d1 100644 --- a/contrib/llvm/include/llvm/CodeGen/LatencyPriorityQueue.h +++ b/contrib/llvm/include/llvm/CodeGen/LatencyPriorityQueue.h @@ -85,11 +85,11 @@ namespace llvm { virtual void dump(ScheduleDAG* DAG) const; - // ScheduledNode - As nodes are scheduled, we look to see if there are any + // scheduledNode - As nodes are scheduled, we look to see if there are any // successor nodes that have a single unscheduled predecessor. If so, that // single predecessor has a higher priority, since scheduling it will make // the node available. - void ScheduledNode(SUnit *Node); + void scheduledNode(SUnit *Node); private: void AdjustPriorityOfUnscheduledPreds(SUnit *SU); diff --git a/contrib/llvm/include/llvm/CodeGen/LexicalScopes.h b/contrib/llvm/include/llvm/CodeGen/LexicalScopes.h index 0271c5d85222..eb01f66c3129 100644 --- a/contrib/llvm/include/llvm/CodeGen/LexicalScopes.h +++ b/contrib/llvm/include/llvm/CodeGen/LexicalScopes.h @@ -153,6 +153,7 @@ class LexicalScopes { /// LexicalScope - This class is used to track scope information. /// class LexicalScope { + virtual void anchor(); public: LexicalScope(LexicalScope *P, const MDNode *D, const MDNode *I, bool A) @@ -208,7 +209,7 @@ class LexicalScope { Parent->closeInsnRange(NewScope); } - /// dominates - Return true if current scope dominsates given lexical scope. + /// dominates - Return true if current scope dominates given lexical scope. bool dominates(const LexicalScope *S) const { if (S == this) return true; diff --git a/contrib/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h b/contrib/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h index 098dd0b3bf73..46dd004609f5 100644 --- a/contrib/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h +++ b/contrib/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h @@ -31,24 +31,20 @@ namespace { if (std::getenv("bar") != (char*) -1) return; - (void) llvm::createDeadMachineInstructionElimPass(); - (void) llvm::createFastRegisterAllocator(); (void) llvm::createBasicRegisterAllocator(); - (void) llvm::createLinearScanRegisterAllocator(); (void) llvm::createGreedyRegisterAllocator(); (void) llvm::createDefaultPBQPRegisterAllocator(); llvm::linkOcamlGC(); llvm::linkShadowStackGC(); - + (void) llvm::createBURRListDAGScheduler(NULL, llvm::CodeGenOpt::Default); - (void) llvm::createTDRRListDAGScheduler(NULL, llvm::CodeGenOpt::Default); (void) llvm::createSourceListDAGScheduler(NULL,llvm::CodeGenOpt::Default); (void) llvm::createHybridListDAGScheduler(NULL,llvm::CodeGenOpt::Default); - (void) llvm::createTDListDAGScheduler(NULL, llvm::CodeGenOpt::Default); (void) llvm::createFastDAGScheduler(NULL, llvm::CodeGenOpt::Default); (void) llvm::createDefaultScheduler(NULL, llvm::CodeGenOpt::Default); + (void) llvm::createVLIWDAGScheduler(NULL, llvm::CodeGenOpt::Default); } } ForceCodegenLinking; // Force link by creating a global definition. diff --git a/contrib/llvm/include/llvm/CodeGen/LiveInterval.h b/contrib/llvm/include/llvm/CodeGen/LiveInterval.h index 2288c1a98b2d..a6008ab33761 100644 --- a/contrib/llvm/include/llvm/CodeGen/LiveInterval.h +++ b/contrib/llvm/include/llvm/CodeGen/LiveInterval.h @@ -43,12 +43,10 @@ namespace llvm { private: enum { HAS_PHI_KILL = 1, - REDEF_BY_EC = 1 << 1, - IS_PHI_DEF = 1 << 2, - IS_UNUSED = 1 << 3 + IS_PHI_DEF = 1 << 1, + IS_UNUSED = 1 << 2 }; - MachineInstr *copy; unsigned char flags; public: @@ -57,23 +55,22 @@ namespace llvm { /// The ID number of this value. unsigned id; - /// The index of the defining instruction (if isDefAccurate() returns true). + /// The index of the defining instruction. SlotIndex def; /// VNInfo constructor. - VNInfo(unsigned i, SlotIndex d, MachineInstr *c) - : copy(c), flags(0), id(i), def(d) + VNInfo(unsigned i, SlotIndex d) + : flags(0), id(i), def(d) { } /// VNInfo construtor, copies values from orig, except for the value number. VNInfo(unsigned i, const VNInfo &orig) - : copy(orig.copy), flags(orig.flags), id(i), def(orig.def) + : flags(orig.flags), id(i), def(orig.def) { } /// Copy from the parameter into this VNInfo. void copyFrom(VNInfo &src) { flags = src.flags; - copy = src.copy; def = src.def; } @@ -86,19 +83,6 @@ namespace llvm { flags = (flags | VNI->flags) & ~IS_UNUSED; } - /// For a register interval, if this VN was definied by a copy instr - /// getCopy() returns a pointer to it, otherwise returns 0. - /// For a stack interval the behaviour of this method is undefined. - MachineInstr* getCopy() const { return copy; } - /// For a register interval, set the copy member. - /// This method should not be called on stack intervals as it may lead to - /// undefined behavior. - void setCopy(MachineInstr *c) { copy = c; } - - /// isDefByCopy - Return true when this value was defined by a copy-like - /// instruction as determined by MachineInstr::isCopyLike. - bool isDefByCopy() const { return copy != 0; } - /// Returns true if one or more kills are PHI nodes. /// Obsolete, do not use! bool hasPHIKill() const { return flags & HAS_PHI_KILL; } @@ -110,17 +94,6 @@ namespace llvm { flags &= ~HAS_PHI_KILL; } - /// Returns true if this value is re-defined by an early clobber somewhere - /// during the live range. - bool hasRedefByEC() const { return flags & REDEF_BY_EC; } - /// Set the "redef by early clobber" flag on this value. - void setHasRedefByEC(bool hasRedef) { - if (hasRedef) - flags |= REDEF_BY_EC; - else - flags &= ~REDEF_BY_EC; - } - /// Returns true if this value is defined by a PHI instruction (or was, /// PHI instrucions may have been eliminated). bool isPHIDef() const { return flags & IS_PHI_DEF; } @@ -294,10 +267,9 @@ namespace llvm { /// getNextValue - Create a new value number and return it. MIIdx specifies /// the instruction that defines the value number. - VNInfo *getNextValue(SlotIndex def, MachineInstr *CopyMI, - VNInfo::Allocator &VNInfoAllocator) { + VNInfo *getNextValue(SlotIndex def, VNInfo::Allocator &VNInfoAllocator) { VNInfo *VNI = - new (VNInfoAllocator) VNInfo((unsigned)valnos.size(), def, CopyMI); + new (VNInfoAllocator) VNInfo((unsigned)valnos.size(), def); valnos.push_back(VNI); return VNI; } @@ -381,7 +353,7 @@ namespace llvm { /// point is not contained in the half-open live range. It is usually the /// getDefIndex() slot following its last use. bool killedAt(SlotIndex index) const { - const_iterator r = find(index.getUseIndex()); + const_iterator r = find(index.getRegSlot(true)); return r != end() && r->end == index; } @@ -405,6 +377,14 @@ namespace llvm { return I == end() ? 0 : &*I; } + const LiveRange *getLiveRangeBefore(SlotIndex Idx) const { + return getLiveRangeContaining(Idx.getPrevSlot()); + } + + LiveRange *getLiveRangeBefore(SlotIndex Idx) { + return getLiveRangeContaining(Idx.getPrevSlot()); + } + /// getVNInfoAt - Return the VNInfo that is live at Idx, or NULL. VNInfo *getVNInfoAt(SlotIndex Idx) const { const_iterator I = FindLiveRangeContaining(Idx); diff --git a/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h b/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h index 8ca58b82c8bb..76201c96f915 100644 --- a/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -63,8 +63,34 @@ namespace llvm { /// allocatableRegs_ - A bit vector of allocatable registers. BitVector allocatableRegs_; - /// CloneMIs - A list of clones as result of re-materialization. - std::vector CloneMIs; + /// reservedRegs_ - A bit vector of reserved registers. + BitVector reservedRegs_; + + /// RegMaskSlots - Sorted list of instructions with register mask operands. + /// Always use the 'r' slot, RegMasks are normal clobbers, not early + /// clobbers. + SmallVector RegMaskSlots; + + /// RegMaskBits - This vector is parallel to RegMaskSlots, it holds a + /// pointer to the corresponding register mask. This pointer can be + /// recomputed as: + /// + /// MI = Indexes->getInstructionFromIndex(RegMaskSlot[N]); + /// unsigned OpNum = findRegMaskOperand(MI); + /// RegMaskBits[N] = MI->getOperand(OpNum).getRegMask(); + /// + /// This is kept in a separate vector partly because some standard + /// libraries don't support lower_bound() with mixed objects, partly to + /// improve locality when searching in RegMaskSlots. + /// Also see the comment in LiveInterval::find(). + SmallVector RegMaskBits; + + /// For each basic block number, keep (begin, size) pairs indexing into the + /// RegMaskSlots and RegMaskBits arrays. + /// Note that basic block numbers may not be layout contiguous, that's why + /// we can't just keep track of the first register mask in each basic + /// block. + SmallVector, 8> RegMaskBlocks; public: static char ID; // Pass identification, replacement for typeid @@ -105,6 +131,12 @@ namespace llvm { return allocatableRegs_.test(reg); } + /// isReserved - is the physical register reg reserved in the current + /// function + bool isReserved(unsigned reg) const { + return reservedRegs_.test(reg); + } + /// getScaledIntervalSize - get the size of an interval in "units," /// where every function is composed of one thousand units. This /// measure scales properly with empty index slots in the function. @@ -125,19 +157,6 @@ namespace llvm { return (unsigned)(IntervalPercentage * indexes_->getFunctionSize()); } - /// conflictsWithPhysReg - Returns true if the specified register is used or - /// defined during the duration of the specified interval. Copies to and - /// from li.reg are allowed. This method is only able to analyze simple - /// ranges that stay within a single basic block. Anything else is - /// considered a conflict. - bool conflictsWithPhysReg(const LiveInterval &li, VirtRegMap &vrm, - unsigned reg); - - /// conflictsWithAliasRef - Similar to conflictsWithPhysRegRef except - /// it checks for alias uses and defs. - bool conflictsWithAliasRef(LiveInterval &li, unsigned Reg, - SmallPtrSet &JoinedCopies); - // Interval creation LiveInterval &getOrCreateInterval(unsigned reg) { Reg2IntervalMap::iterator I = r2iMap_.find(reg); @@ -177,14 +196,6 @@ namespace llvm { return indexes_; } - SlotIndex getZeroIndex() const { - return indexes_->getZeroIndex(); - } - - SlotIndex getInvalidIndex() const { - return indexes_->getInvalidIndex(); - } - /// isNotInMIMap - returns true if the specified machine instr has been /// removed or was never entered in the map. bool isNotInMIMap(const MachineInstr* Instr) const { @@ -216,21 +227,11 @@ namespace llvm { return li.liveAt(getMBBStartIdx(mbb)); } - LiveRange* findEnteringRange(LiveInterval &li, - const MachineBasicBlock *mbb) { - return li.getLiveRangeContaining(getMBBStartIdx(mbb)); - } - bool isLiveOutOfMBB(const LiveInterval &li, const MachineBasicBlock *mbb) const { return li.liveAt(getMBBEndIdx(mbb).getPrevSlot()); } - LiveRange* findExitingRange(LiveInterval &li, - const MachineBasicBlock *mbb) { - return li.getLiveRangeContaining(getMBBEndIdx(mbb).getPrevSlot()); - } - MachineBasicBlock* getMBBFromIndex(SlotIndex index) const { return indexes_->getMBBFromIndex(index); } @@ -247,19 +248,11 @@ namespace llvm { indexes_->replaceMachineInstrInMaps(MI, NewMI); } - void InsertMBBInMaps(MachineBasicBlock *MBB) { - indexes_->insertMBBInMaps(MBB); - } - bool findLiveInMBBs(SlotIndex Start, SlotIndex End, SmallVectorImpl &MBBs) const { return indexes_->findLiveInMBBs(Start, End, MBBs); } - void renumber() { - indexes_->renumberIndexes(); - } - VNInfo::Allocator& getVNInfoAllocator() { return VNInfoAllocator; } virtual void getAnalysisUsage(AnalysisUsage &AU) const; @@ -271,20 +264,6 @@ namespace llvm { /// print - Implement the dump method. virtual void print(raw_ostream &O, const Module* = 0) const; - /// addIntervalsForSpills - Create new intervals for spilled defs / uses of - /// the given interval. FIXME: It also returns the weight of the spill slot - /// (if any is created) by reference. This is temporary. - std::vector - addIntervalsForSpills(const LiveInterval& i, - const SmallVectorImpl *SpillIs, - const MachineLoopInfo *loopInfo, VirtRegMap& vrm); - - /// spillPhysRegAroundRegDefsUses - Spill the specified physical register - /// around all defs and uses of the specified interval. Return true if it - /// was able to cut its interval. - bool spillPhysRegAroundRegDefsUses(const LiveInterval &li, - unsigned PhysReg, VirtRegMap &vrm); - /// isReMaterializable - Returns true if every definition of MI of every /// val# of the specified interval is re-materializable. Also returns true /// by reference if all of the defs are load instructions. @@ -292,34 +271,72 @@ namespace llvm { const SmallVectorImpl *SpillIs, bool &isLoad); - /// isReMaterializable - Returns true if the definition MI of the specified - /// val# of the specified interval is re-materializable. - bool isReMaterializable(const LiveInterval &li, const VNInfo *ValNo, - MachineInstr *MI); - - /// getRepresentativeReg - Find the largest super register of the specified - /// physical register. - unsigned getRepresentativeReg(unsigned Reg) const; - - /// getNumConflictsWithPhysReg - Return the number of uses and defs of the - /// specified interval that conflicts with the specified physical register. - unsigned getNumConflictsWithPhysReg(const LiveInterval &li, - unsigned PhysReg) const; - - /// intervalIsInOneMBB - Returns true if the specified interval is entirely - /// within a single basic block. - bool intervalIsInOneMBB(const LiveInterval &li) const; - - /// getLastSplitPoint - Return the last possible insertion point in mbb for - /// spilling and splitting code. This is the first terminator, or the call - /// instruction if li is live into a landing pad successor. - MachineBasicBlock::iterator getLastSplitPoint(const LiveInterval &li, - MachineBasicBlock *mbb) const; + /// intervalIsInOneMBB - If LI is confined to a single basic block, return + /// a pointer to that block. If LI is live in to or out of any block, + /// return NULL. + MachineBasicBlock *intervalIsInOneMBB(const LiveInterval &LI) const; /// addKillFlags - Add kill flags to any instruction that kills a virtual /// register. void addKillFlags(); + /// handleMove - call this method to notify LiveIntervals that + /// instruction 'mi' has been moved within a basic block. This will update + /// the live intervals for all operands of mi. Moves between basic blocks + /// are not supported. + void handleMove(MachineInstr* MI); + + /// moveIntoBundle - Update intervals for operands of MI so that they + /// begin/end on the SlotIndex for BundleStart. + /// + /// Requires MI and BundleStart to have SlotIndexes, and assumes + /// existing liveness is accurate. BundleStart should be the first + /// instruction in the Bundle. + void handleMoveIntoBundle(MachineInstr* MI, MachineInstr* BundleStart); + + // Register mask functions. + // + // Machine instructions may use a register mask operand to indicate that a + // large number of registers are clobbered by the instruction. This is + // typically used for calls. + // + // For compile time performance reasons, these clobbers are not recorded in + // the live intervals for individual physical registers. Instead, + // LiveIntervalAnalysis maintains a sorted list of instructions with + // register mask operands. + + /// getRegMaskSlots - Returns a sorted array of slot indices of all + /// instructions with register mask operands. + ArrayRef getRegMaskSlots() const { return RegMaskSlots; } + + /// getRegMaskSlotsInBlock - Returns a sorted array of slot indices of all + /// instructions with register mask operands in the basic block numbered + /// MBBNum. + ArrayRef getRegMaskSlotsInBlock(unsigned MBBNum) const { + std::pair P = RegMaskBlocks[MBBNum]; + return getRegMaskSlots().slice(P.first, P.second); + } + + /// getRegMaskBits() - Returns an array of register mask pointers + /// corresponding to getRegMaskSlots(). + ArrayRef getRegMaskBits() const { return RegMaskBits; } + + /// getRegMaskBitsInBlock - Returns an array of mask pointers corresponding + /// to getRegMaskSlotsInBlock(MBBNum). + ArrayRef getRegMaskBitsInBlock(unsigned MBBNum) const { + std::pair P = RegMaskBlocks[MBBNum]; + return getRegMaskBits().slice(P.first, P.second); + } + + /// checkRegMaskInterference - Test if LI is live across any register mask + /// instructions, and compute a bit mask of physical registers that are not + /// clobbered by any of them. + /// + /// Returns false if LI doesn't cross any register mask instructions. In + /// that case, the bit vector is not filled in. + bool checkRegMaskInterference(LiveInterval &LI, + BitVector &UsableRegs); + private: /// computeIntervals - Compute live intervals. void computeIntervals(); @@ -351,13 +368,12 @@ namespace llvm { void handlePhysicalRegisterDef(MachineBasicBlock* mbb, MachineBasicBlock::iterator mi, SlotIndex MIIdx, MachineOperand& MO, - LiveInterval &interval, - MachineInstr *CopyMI); + LiveInterval &interval); /// handleLiveInRegister - Create interval for a livein register. void handleLiveInRegister(MachineBasicBlock* mbb, SlotIndex MIIdx, - LiveInterval &interval, bool isAlias = false); + LiveInterval &interval); /// getReMatImplicitUse - If the remat definition MI has one (for now, we /// only allow one) virtual register operand, then its uses are implicitly @@ -379,88 +395,12 @@ namespace llvm { const SmallVectorImpl *SpillIs, bool &isLoad); - /// tryFoldMemoryOperand - Attempts to fold either a spill / restore from - /// slot / to reg or any rematerialized load into ith operand of specified - /// MI. If it is successul, MI is updated with the newly created MI and - /// returns true. - bool tryFoldMemoryOperand(MachineInstr* &MI, VirtRegMap &vrm, - MachineInstr *DefMI, SlotIndex InstrIdx, - SmallVector &Ops, - bool isSS, int FrameIndex, unsigned Reg); - - /// canFoldMemoryOperand - Return true if the specified load / store - /// folding is possible. - bool canFoldMemoryOperand(MachineInstr *MI, - SmallVector &Ops, - bool ReMatLoadSS) const; - - /// anyKillInMBBAfterIdx - Returns true if there is a kill of the specified - /// VNInfo that's after the specified index but is within the basic block. - bool anyKillInMBBAfterIdx(const LiveInterval &li, const VNInfo *VNI, - MachineBasicBlock *MBB, - SlotIndex Idx) const; - - /// hasAllocatableSuperReg - Return true if the specified physical register - /// has any super register that's allocatable. - bool hasAllocatableSuperReg(unsigned Reg) const; - - /// SRInfo - Spill / restore info. - struct SRInfo { - SlotIndex index; - unsigned vreg; - bool canFold; - SRInfo(SlotIndex i, unsigned vr, bool f) - : index(i), vreg(vr), canFold(f) {} - }; - - bool alsoFoldARestore(int Id, SlotIndex index, unsigned vr, - BitVector &RestoreMBBs, - DenseMap >&RestoreIdxes); - void eraseRestoreInfo(int Id, SlotIndex index, unsigned vr, - BitVector &RestoreMBBs, - DenseMap >&RestoreIdxes); - - /// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being - /// spilled and create empty intervals for their uses. - void handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm, - const TargetRegisterClass* rc, - std::vector &NewLIs); - - /// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of - /// interval on to-be re-materialized operands of MI) with new register. - void rewriteImplicitOps(const LiveInterval &li, - MachineInstr *MI, unsigned NewVReg, VirtRegMap &vrm); - - /// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper - /// functions for addIntervalsForSpills to rewrite uses / defs for the given - /// live range. - bool rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, - bool TrySplit, SlotIndex index, SlotIndex end, - MachineInstr *MI, MachineInstr *OrigDefMI, MachineInstr *DefMI, - unsigned Slot, int LdSlot, - bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, - VirtRegMap &vrm, const TargetRegisterClass* rc, - SmallVector &ReMatIds, const MachineLoopInfo *loopInfo, - unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse, - DenseMap &MBBVRegsMap, - std::vector &NewLIs); - void rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, - LiveInterval::Ranges::const_iterator &I, - MachineInstr *OrigDefMI, MachineInstr *DefMI, unsigned Slot, int LdSlot, - bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, - VirtRegMap &vrm, const TargetRegisterClass* rc, - SmallVector &ReMatIds, const MachineLoopInfo *loopInfo, - BitVector &SpillMBBs, - DenseMap > &SpillIdxes, - BitVector &RestoreMBBs, - DenseMap > &RestoreIdxes, - DenseMap &MBBVRegsMap, - std::vector &NewLIs); - static LiveInterval* createInterval(unsigned Reg); void printInstrs(raw_ostream &O) const; void dumpInstrs() const; + + class HMEditor; }; } // End llvm namespace diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.h b/contrib/llvm/include/llvm/CodeGen/LiveRangeEdit.h similarity index 79% rename from contrib/llvm/lib/CodeGen/LiveRangeEdit.h rename to contrib/llvm/include/llvm/CodeGen/LiveRangeEdit.h index 9b0a671ea9e5..57a619389fa5 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.h +++ b/contrib/llvm/include/llvm/CodeGen/LiveRangeEdit.h @@ -21,6 +21,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/LiveInterval.h" +#include "llvm/Target/TargetMachine.h" namespace llvm { @@ -33,7 +34,9 @@ class VirtRegMap; class LiveRangeEdit { public: /// Callback methods for LiveRangeEdit owners. - struct Delegate { + class Delegate { + virtual void anchor(); + public: /// Called immediately before erasing a dead machine instruction. virtual void LRE_WillEraseInstruction(MachineInstr *MI) {} @@ -54,8 +57,11 @@ class LiveRangeEdit { private: LiveInterval &parent_; SmallVectorImpl &newRegs_; + MachineRegisterInfo &MRI; + LiveIntervals &LIS; + VirtRegMap *VRM; + const TargetInstrInfo &TII; Delegate *const delegate_; - const SmallVectorImpl *uselessRegs_; /// firstNew_ - Index of the first register added to newRegs_. const unsigned firstNew_; @@ -72,34 +78,37 @@ class LiveRangeEdit { SmallPtrSet rematted_; /// scanRemattable - Identify the parent_ values that may rematerialize. - void scanRemattable(LiveIntervals &lis, - const TargetInstrInfo &tii, - AliasAnalysis *aa); + void scanRemattable(AliasAnalysis *aa); /// allUsesAvailableAt - Return true if all registers used by OrigMI at /// OrigIdx are also available with the same value at UseIdx. bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, - SlotIndex UseIdx, LiveIntervals &lis); + SlotIndex UseIdx); /// foldAsLoad - If LI has a single use and a single def that can be folded as /// a load, eliminate the register by folding the def into the use. - bool foldAsLoad(LiveInterval *LI, SmallVectorImpl &Dead, - MachineRegisterInfo&, LiveIntervals&, const TargetInstrInfo&); + bool foldAsLoad(LiveInterval *LI, SmallVectorImpl &Dead); public: /// Create a LiveRangeEdit for breaking down parent into smaller pieces. /// @param parent The register being spilled or split. /// @param newRegs List to receive any new registers created. This needn't be /// empty initially, any existing registers are ignored. - /// @param uselessRegs List of registers that can't be used when - /// rematerializing values because they are about to be removed. + /// @param MF The MachineFunction the live range edit is taking place in. + /// @param lis The collection of all live intervals in this function. + /// @param vrm Map of virtual registers to physical registers for this + /// function. If NULL, no virtual register map updates will + /// be done. This could be the case if called before Regalloc. LiveRangeEdit(LiveInterval &parent, SmallVectorImpl &newRegs, - Delegate *delegate = 0, - const SmallVectorImpl *uselessRegs = 0) + MachineFunction &MF, + LiveIntervals &lis, + VirtRegMap *vrm, + Delegate *delegate = 0) : parent_(parent), newRegs_(newRegs), + MRI(MF.getRegInfo()), LIS(lis), VRM(vrm), + TII(*MF.getTarget().getInstrInfo()), delegate_(delegate), - uselessRegs_(uselessRegs), firstNew_(newRegs.size()), scannedRemattable_(false) {} @@ -118,32 +127,24 @@ class LiveRangeEdit { return makeArrayRef(newRegs_).slice(firstNew_); } - /// FIXME: Temporary accessors until we can get rid of - /// LiveIntervals::AddIntervalsForSpills - SmallVectorImpl *getNewVRegs() { return &newRegs_; } - const SmallVectorImpl *getUselessVRegs() { - return uselessRegs_; - } - /// createFrom - Create a new virtual register based on OldReg. - LiveInterval &createFrom(unsigned OldReg, LiveIntervals&, VirtRegMap&); + LiveInterval &createFrom(unsigned OldReg); /// create - Create a new register with the same class and original slot as /// parent. - LiveInterval &create(LiveIntervals &LIS, VirtRegMap &VRM) { - return createFrom(getReg(), LIS, VRM); + LiveInterval &create() { + return createFrom(getReg()); } /// anyRematerializable - Return true if any parent values may be /// rematerializable. /// This function must be called before any rematerialization is attempted. - bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&, - AliasAnalysis*); + bool anyRematerializable(AliasAnalysis*); /// checkRematerializable - Manually add VNI to the list of rematerializable /// values if DefMI may be rematerializable. bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, - const TargetInstrInfo&, AliasAnalysis*); + AliasAnalysis*); /// Remat - Information needed to rematerialize at a specific location. struct Remat { @@ -157,8 +158,7 @@ class LiveRangeEdit { /// When cheapAsAMove is set, only cheap remats are allowed. bool canRematerializeAt(Remat &RM, SlotIndex UseIdx, - bool cheapAsAMove, - LiveIntervals &lis); + bool cheapAsAMove); /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an /// instruction into MBB before MI. The new instruction is mapped, but @@ -168,8 +168,6 @@ class LiveRangeEdit { MachineBasicBlock::iterator MI, unsigned DestReg, const Remat &RM, - LiveIntervals&, - const TargetInstrInfo&, const TargetRegisterInfo&, bool Late = false); @@ -186,18 +184,21 @@ class LiveRangeEdit { /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try /// to erase it from LIS. - void eraseVirtReg(unsigned Reg, LiveIntervals &LIS); + void eraseVirtReg(unsigned Reg); /// eliminateDeadDefs - Try to delete machine instructions that are now dead /// (allDefsAreDead returns true). This may cause live intervals to be trimmed /// and further dead efs to be eliminated. + /// RegsBeingSpilled lists registers currently being spilled by the register + /// allocator. These registers should not be split into new intervals + /// as currently those new intervals are not guaranteed to spill. void eliminateDeadDefs(SmallVectorImpl &Dead, - LiveIntervals&, VirtRegMap&, - const TargetInstrInfo&); + ArrayRef RegsBeingSpilled + = ArrayRef()); /// calculateRegClassAndHint - Recompute register class and hint for each new /// register. - void calculateRegClassAndHint(MachineFunction&, LiveIntervals&, + void calculateRegClassAndHint(MachineFunction&, const MachineLoopInfo&); }; diff --git a/contrib/llvm/include/llvm/CodeGen/LiveVariables.h b/contrib/llvm/include/llvm/CodeGen/LiveVariables.h index 7ba901fc28a4..d4bb409e0605 100644 --- a/contrib/llvm/include/llvm/CodeGen/LiveVariables.h +++ b/contrib/llvm/include/llvm/CodeGen/LiveVariables.h @@ -85,17 +85,11 @@ class LiveVariables : public MachineFunctionPass { /// SparseBitVector<> AliveBlocks; - /// NumUses - Number of uses of this register across the entire function. - /// - unsigned NumUses; - /// Kills - List of MachineInstruction's which are the last use of this /// virtual register (kill it) in their basic block. /// std::vector Kills; - VarInfo() : NumUses(0) {} - /// removeKill - Delete a kill corresponding to the specified /// machine instruction. Returns true if there was a kill /// corresponding to this instruction, false otherwise. @@ -166,6 +160,9 @@ class LiveVariables : public MachineFunctionPass { /// the last use of the whole register. bool HandlePhysRegKill(unsigned Reg, MachineInstr *MI); + /// HandleRegMask - Call HandlePhysRegKill for all registers clobbered by Mask. + void HandleRegMask(const MachineOperand&); + void HandlePhysRegUse(unsigned Reg, MachineInstr *MI); void HandlePhysRegDef(unsigned Reg, MachineInstr *MI, SmallVector &Defs); diff --git a/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 5a20e952b9cc..ef9c0c200584 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -77,6 +77,7 @@ class MachineBasicBlock : public ilist_node { /// (disable optimization). std::vector Weights; typedef std::vector::iterator weight_iterator; + typedef std::vector::const_iterator const_weight_iterator; /// LiveIns - Keep track of the physical registers that are livein of /// the basicblock. @@ -84,8 +85,9 @@ class MachineBasicBlock : public ilist_node { /// Alignment - Alignment of the basic block. Zero if the basic block does /// not need to be aligned. + /// The alignment is specified as log2(bytes). unsigned Alignment; - + /// IsLandingPad - Indicate that this basic block is entered via an /// exception handler. bool IsLandingPad; @@ -115,6 +117,10 @@ class MachineBasicBlock : public ilist_node { /// "(null)". StringRef getName() const; + /// getFullName - Return a formatted string to identify this block and its + /// parent function. + std::string getFullName() const; + /// hasAddressTaken - Test whether this block is potentially the target /// of an indirect branch. bool hasAddressTaken() const { return AddressTaken; } @@ -128,10 +134,89 @@ class MachineBasicBlock : public ilist_node { const MachineFunction *getParent() const { return xParent; } MachineFunction *getParent() { return xParent; } - typedef Instructions::iterator iterator; - typedef Instructions::const_iterator const_iterator; - typedef std::reverse_iterator const_reverse_iterator; - typedef std::reverse_iterator reverse_iterator; + + /// bundle_iterator - MachineBasicBlock iterator that automatically skips over + /// MIs that are inside bundles (i.e. walk top level MIs only). + template + class bundle_iterator + : public std::iterator { + IterTy MII; + + public: + bundle_iterator(IterTy mii) : MII(mii) { + assert(!MII->isInsideBundle() && + "It's not legal to initialize bundle_iterator with a bundled MI"); + } + + bundle_iterator(Ty &mi) : MII(mi) { + assert(!mi.isInsideBundle() && + "It's not legal to initialize bundle_iterator with a bundled MI"); + } + bundle_iterator(Ty *mi) : MII(mi) { + assert((!mi || !mi->isInsideBundle()) && + "It's not legal to initialize bundle_iterator with a bundled MI"); + } + bundle_iterator(const bundle_iterator &I) : MII(I.MII) {} + bundle_iterator() : MII(0) {} + + Ty &operator*() const { return *MII; } + Ty *operator->() const { return &operator*(); } + + operator Ty*() const { return MII; } + + bool operator==(const bundle_iterator &x) const { + return MII == x.MII; + } + bool operator!=(const bundle_iterator &x) const { + return !operator==(x); + } + + // Increment and decrement operators... + bundle_iterator &operator--() { // predecrement - Back up + do { + --MII; + } while (MII->isInsideBundle()); + return *this; + } + bundle_iterator &operator++() { // preincrement - Advance + do { + ++MII; + } while (MII->isInsideBundle()); + return *this; + } + bundle_iterator operator--(int) { // postdecrement operators... + bundle_iterator tmp = *this; + do { + --MII; + } while (MII->isInsideBundle()); + return tmp; + } + bundle_iterator operator++(int) { // postincrement operators... + bundle_iterator tmp = *this; + do { + ++MII; + } while (MII->isInsideBundle()); + return tmp; + } + + IterTy getInstrIterator() const { + return MII; + } + }; + + typedef Instructions::iterator instr_iterator; + typedef Instructions::const_iterator const_instr_iterator; + typedef std::reverse_iterator reverse_instr_iterator; + typedef + std::reverse_iterator const_reverse_instr_iterator; + + typedef + bundle_iterator iterator; + typedef + bundle_iterator const_iterator; + typedef std::reverse_iterator const_reverse_iterator; + typedef std::reverse_iterator reverse_iterator; + unsigned size() const { return (unsigned)Insts.size(); } bool empty() const { return Insts.empty(); } @@ -141,15 +226,53 @@ class MachineBasicBlock : public ilist_node { const MachineInstr& front() const { return Insts.front(); } const MachineInstr& back() const { return Insts.back(); } + instr_iterator instr_begin() { return Insts.begin(); } + const_instr_iterator instr_begin() const { return Insts.begin(); } + instr_iterator instr_end() { return Insts.end(); } + const_instr_iterator instr_end() const { return Insts.end(); } + reverse_instr_iterator instr_rbegin() { return Insts.rbegin(); } + const_reverse_instr_iterator instr_rbegin() const { return Insts.rbegin(); } + reverse_instr_iterator instr_rend () { return Insts.rend(); } + const_reverse_instr_iterator instr_rend () const { return Insts.rend(); } + iterator begin() { return Insts.begin(); } const_iterator begin() const { return Insts.begin(); } - iterator end() { return Insts.end(); } - const_iterator end() const { return Insts.end(); } - reverse_iterator rbegin() { return Insts.rbegin(); } - const_reverse_iterator rbegin() const { return Insts.rbegin(); } + iterator end() { + instr_iterator II = instr_end(); + if (II != instr_begin()) { + while (II->isInsideBundle()) + --II; + } + return II; + } + const_iterator end() const { + const_instr_iterator II = instr_end(); + if (II != instr_begin()) { + while (II->isInsideBundle()) + --II; + } + return II; + } + reverse_iterator rbegin() { + reverse_instr_iterator II = instr_rbegin(); + if (II != instr_rend()) { + while (II->isInsideBundle()) + ++II; + } + return II; + } + const_reverse_iterator rbegin() const { + const_reverse_instr_iterator II = instr_rbegin(); + if (II != instr_rend()) { + while (II->isInsideBundle()) + ++II; + } + return II; + } reverse_iterator rend () { return Insts.rend(); } const_reverse_iterator rend () const { return Insts.rend(); } + // Machine-CFG iterators typedef std::vector::iterator pred_iterator; typedef std::vector::const_iterator const_pred_iterator; @@ -219,10 +342,12 @@ class MachineBasicBlock : public ilist_node { bool livein_empty() const { return LiveIns.empty(); } /// getAlignment - Return alignment of the basic block. + /// The alignment is specified as log2(bytes). /// unsigned getAlignment() const { return Alignment; } /// setAlignment - Set alignment of the basic block. + /// The alignment is specified as log2(bytes). /// void setAlignment(unsigned Align) { Alignment = Align; } @@ -239,7 +364,7 @@ class MachineBasicBlock : public ilist_node { const MachineBasicBlock *getLandingPadSuccessor() const; // Code Layout methods. - + /// moveBefore/moveAfter - move 'this' block before or after the specified /// block. This only moves the block, it does not modify the CFG or adjust /// potential fall-throughs at the end of the block. @@ -286,7 +411,7 @@ class MachineBasicBlock : public ilist_node { /// in transferSuccessors, and update PHI operands in the successor blocks /// which refer to fromMBB to refer to this. void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB); - + /// isSuccessor - Return true if the specified MBB is a successor of this /// block. bool isSuccessor(const MachineBasicBlock *MBB) const; @@ -304,7 +429,7 @@ class MachineBasicBlock : public ilist_node { /// branch to do so (e.g., a table jump). True is a conservative answer. bool canFallThrough(); - /// Returns a pointer to the first instructon in this block that is not a + /// Returns a pointer to the first instructon in this block that is not a /// PHINode instruction. When adding instruction to the beginning of the /// basic block, they should be added before the returned value, not before /// the first instruction, which might be PHI. @@ -320,18 +445,16 @@ class MachineBasicBlock : public ilist_node { /// instruction of this basic block. If a terminator does not exist, /// it returns end() iterator getFirstTerminator(); + const_iterator getFirstTerminator() const; - const_iterator getFirstTerminator() const { - return const_cast(this)->getFirstTerminator(); - } + /// getFirstInstrTerminator - Same getFirstTerminator but it ignores bundles + /// and return an instr_iterator instead. + instr_iterator getFirstInstrTerminator(); /// getLastNonDebugInstr - returns an iterator to the last non-debug /// instruction in the basic block, or end() iterator getLastNonDebugInstr(); - - const_iterator getLastNonDebugInstr() const { - return const_cast(this)->getLastNonDebugInstr(); - } + const_iterator getLastNonDebugInstr() const; /// SplitCriticalEdge - Split the critical edge from this block to the /// given successor block, and return the newly created block, or null @@ -344,38 +467,88 @@ class MachineBasicBlock : public ilist_node { void pop_front() { Insts.pop_front(); } void pop_back() { Insts.pop_back(); } void push_back(MachineInstr *MI) { Insts.push_back(MI); } + template - void insert(iterator I, IT S, IT E) { Insts.insert(I, S, E); } - iterator insert(iterator I, MachineInstr *M) { return Insts.insert(I, M); } - iterator insertAfter(iterator I, MachineInstr *M) { - return Insts.insertAfter(I, M); + void insert(instr_iterator I, IT S, IT E) { + Insts.insert(I, S, E); + } + instr_iterator insert(instr_iterator I, MachineInstr *M) { + return Insts.insert(I, M); + } + instr_iterator insertAfter(instr_iterator I, MachineInstr *M) { + return Insts.insertAfter(I, M); } - // erase - Remove the specified element or range from the instruction list. - // These functions delete any instructions removed. - // - iterator erase(iterator I) { return Insts.erase(I); } - iterator erase(iterator I, iterator E) { return Insts.erase(I, E); } - MachineInstr *remove(MachineInstr *I) { return Insts.remove(I); } - void clear() { Insts.clear(); } + template + void insert(iterator I, IT S, IT E) { + Insts.insert(I.getInstrIterator(), S, E); + } + iterator insert(iterator I, MachineInstr *M) { + return Insts.insert(I.getInstrIterator(), M); + } + iterator insertAfter(iterator I, MachineInstr *M) { + return Insts.insertAfter(I.getInstrIterator(), M); + } + + /// erase - Remove the specified element or range from the instruction list. + /// These functions delete any instructions removed. + /// + instr_iterator erase(instr_iterator I) { + return Insts.erase(I); + } + instr_iterator erase(instr_iterator I, instr_iterator E) { + return Insts.erase(I, E); + } + instr_iterator erase_instr(MachineInstr *I) { + instr_iterator MII(I); + return erase(MII); + } + + iterator erase(iterator I); + iterator erase(iterator I, iterator E) { + return Insts.erase(I.getInstrIterator(), E.getInstrIterator()); + } + iterator erase(MachineInstr *I) { + iterator MII(I); + return erase(MII); + } + + /// remove - Remove the instruction from the instruction list. This function + /// does not delete the instruction. WARNING: Note, if the specified + /// instruction is a bundle this function will remove all the bundled + /// instructions as well. It is up to the caller to keep a list of the + /// bundled instructions and re-insert them if desired. This function is + /// *not recommended* for manipulating instructions with bundles. Use + /// splice instead. + MachineInstr *remove(MachineInstr *I); + void clear() { + Insts.clear(); + } /// splice - Take an instruction from MBB 'Other' at the position From, /// and insert it into this MBB right before 'where'. - void splice(iterator where, MachineBasicBlock *Other, iterator From) { + void splice(instr_iterator where, MachineBasicBlock *Other, + instr_iterator From) { Insts.splice(where, Other->Insts, From); } + void splice(iterator where, MachineBasicBlock *Other, iterator From); /// splice - Take a block of instructions from MBB 'Other' in the range [From, /// To), and insert them into this MBB right before 'where'. + void splice(instr_iterator where, MachineBasicBlock *Other, instr_iterator From, + instr_iterator To) { + Insts.splice(where, Other->Insts, From, To); + } void splice(iterator where, MachineBasicBlock *Other, iterator From, iterator To) { - Insts.splice(where, Other->Insts, From, To); + Insts.splice(where.getInstrIterator(), Other->Insts, + From.getInstrIterator(), To.getInstrIterator()); } /// removeFromParent - This method unlinks 'this' from the containing /// function, and returns it, but does not delete it. MachineBasicBlock *removeFromParent(); - + /// eraseFromParent - This method unlinks 'this' from the containing /// function and deletes it. void eraseFromParent(); @@ -396,7 +569,10 @@ class MachineBasicBlock : public ilist_node { /// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping /// any DBG_VALUE instructions. Return UnknownLoc if there is none. - DebugLoc findDebugLoc(MachineBasicBlock::iterator &MBBI); + DebugLoc findDebugLoc(instr_iterator MBBI); + DebugLoc findDebugLoc(iterator MBBI) { + return findDebugLoc(MBBI.getInstrIterator()); + } // Debugging methods. void dump() const; @@ -418,13 +594,14 @@ class MachineBasicBlock : public ilist_node { /// getWeightIterator - Return weight iterator corresponding to the I /// successor iterator. weight_iterator getWeightIterator(succ_iterator I); + const_weight_iterator getWeightIterator(const_succ_iterator I) const; friend class MachineBranchProbabilityInfo; /// getSuccWeight - Return weight of the edge from this block to MBB. This /// method should NOT be called directly, but by using getEdgeWeight method /// from MachineBranchProbabilityInfo class. - uint32_t getSuccWeight(MachineBasicBlock *succ); + uint32_t getSuccWeight(const MachineBasicBlock *succ) const; // Methods used to maintain doubly linked list of blocks... diff --git a/contrib/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h index 416d40bf3098..a9c7bf7dbc60 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h @@ -20,6 +20,7 @@ namespace llvm { +class MachineBasicBlock; class MachineBranchProbabilityInfo; template class BlockFrequencyImpl; @@ -28,7 +29,8 @@ class BlockFrequencyImpl; /// machine basic block frequencies. class MachineBlockFrequencyInfo : public MachineFunctionPass { - BlockFrequencyImpl *MBFI; + BlockFrequencyImpl *MBFI; public: static char ID; @@ -46,7 +48,7 @@ class MachineBlockFrequencyInfo : public MachineFunctionPass { /// that we should not rely on the value itself, but only on the comparison to /// the other block frequencies. We do this to avoid using of floating points. /// - BlockFrequency getBlockFreq(MachineBasicBlock *MBB) const; + BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const; }; } diff --git a/contrib/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h index d9673e2197c8..af4db7d6bde6 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h @@ -25,6 +25,7 @@ class raw_ostream; class MachineBasicBlock; class MachineBranchProbabilityInfo : public ImmutablePass { + virtual void anchor(); // Default weight value. Used when we don't have information about the edge. // TODO: DEFAULT_WEIGHT makes sense during static predication, when none of @@ -34,9 +35,6 @@ class MachineBranchProbabilityInfo : public ImmutablePass { // weight to just "inherit" the non-zero weight of an adjacent successor. static const uint32_t DEFAULT_WEIGHT = 16; - // Get sum of the block successors' weights. - uint32_t getSumForBlock(MachineBasicBlock *MBB) const; - public: static char ID; @@ -51,17 +49,27 @@ class MachineBranchProbabilityInfo : public ImmutablePass { // Return edge weight. If we don't have any informations about it - return // DEFAULT_WEIGHT. - uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst) const; + uint32_t getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const; + + // Get sum of the block successors' weights, potentially scaling them to fit + // within 32-bits. If scaling is required, sets Scale based on the necessary + // adjustment. Any edge weights used with the sum should be divided by Scale. + uint32_t getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const; // A 'Hot' edge is an edge which probability is >= 80%. bool isEdgeHot(MachineBasicBlock *Src, MachineBasicBlock *Dst) const; // Return a hot successor for the block BB or null if there isn't one. + // NB: This routine's complexity is linear on the number of successors. MachineBasicBlock *getHotSucc(MachineBasicBlock *MBB) const; // Return a probability as a fraction between 0 (0% probability) and // 1 (100% probability), however the value is never equal to 0, and can be 1 // only iff SRC block has only one successor. + // NB: This routine's complexity is linear on the number of successors of + // Src. Querying sequentially for each successor's probability is a quadratic + // query pattern. BranchProbability getEdgeProbability(MachineBasicBlock *Src, MachineBasicBlock *Dst) const; diff --git a/contrib/llvm/include/llvm/CodeGen/MachineCodeEmitter.h b/contrib/llvm/include/llvm/CodeGen/MachineCodeEmitter.h index 428aada7ba13..86e8f27877e2 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineCodeEmitter.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineCodeEmitter.h @@ -20,6 +20,8 @@ #include "llvm/Support/DataTypes.h" #include "llvm/Support/DebugLoc.h" +#include + namespace llvm { class MachineBasicBlock; @@ -49,6 +51,7 @@ class MCSymbol; /// occurred, more memory is allocated, and we reemit the code into it. /// class MachineCodeEmitter { + virtual void anchor(); protected: /// BufferBegin/BufferEnd - Pointers to the start and end of the memory /// allocated for this code buffer. diff --git a/contrib/llvm/include/llvm/CodeGen/MachineConstantPool.h b/contrib/llvm/include/llvm/CodeGen/MachineConstantPool.h index 29f4f443bf7b..d6d65a24defb 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineConstantPool.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineConstantPool.h @@ -34,6 +34,7 @@ class raw_ostream; /// Abstract base class for all machine specific constantpool value subclasses. /// class MachineConstantPoolValue { + virtual void anchor(); Type *Ty; public: diff --git a/contrib/llvm/include/llvm/CodeGen/MachineDominators.h b/contrib/llvm/include/llvm/CodeGen/MachineDominators.h index ab944a2335f7..82a4ac821b69 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineDominators.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineDominators.h @@ -84,7 +84,8 @@ class MachineDominatorTree : public MachineFunctionPass { // Loop through the basic block until we find A or B. MachineBasicBlock::iterator I = BBA->begin(); - for (; &*I != A && &*I != B; ++I) /*empty*/; + for (; &*I != A && &*I != B; ++I) + /*empty*/ ; //if(!DT.IsPostDominators) { // A dominates B if it is found first in the basic block. diff --git a/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h index b347ca8e680a..44402a9e68fb 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -465,7 +465,7 @@ class MachineFrameInfo { bool isSpillSlotObjectIndex(int ObjectIdx) const { assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && "Invalid Object Idx!"); - return Objects[ObjectIdx+NumFixedObjects].isSpillSlot;; + return Objects[ObjectIdx+NumFixedObjects].isSpillSlot; } /// isDeadObjectIndex - Returns true if the specified index corresponds to diff --git a/contrib/llvm/include/llvm/CodeGen/MachineFunction.h b/contrib/llvm/include/llvm/CodeGen/MachineFunction.h index 6e08f7b0503d..dda2dc708769 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineFunction.h @@ -120,10 +120,12 @@ class MachineFunction { /// Alignment - The alignment of the function. unsigned Alignment; - /// CallsSetJmp - True if the function calls setjmp or sigsetjmp. This is used - /// to limit optimizations which cannot reason about the control flow of - /// setjmp. - bool CallsSetJmp; + /// ExposesReturnsTwice - True if the function calls setjmp or related + /// functions with attribute "returns twice", but doesn't have + /// the attribute itself. + /// This is used to limit optimizations which cannot reason + /// about the control flow of such functions. + bool ExposesReturnsTwice; MachineFunction(const MachineFunction &); // DO NOT IMPLEMENT void operator=(const MachineFunction&); // DO NOT IMPLEMENT @@ -187,20 +189,22 @@ class MachineFunction { /// void setAlignment(unsigned A) { Alignment = A; } - /// EnsureAlignment - Make sure the function is at least 'A' bits aligned. + /// EnsureAlignment - Make sure the function is at least 1 << A bytes aligned. void EnsureAlignment(unsigned A) { if (Alignment < A) Alignment = A; } - /// callsSetJmp - Returns true if the function calls setjmp or sigsetjmp. - bool callsSetJmp() const { - return CallsSetJmp; + /// exposesReturnsTwice - Returns true if the function calls setjmp or + /// any other similar functions with attribute "returns twice" without + /// having the attribute itself. + bool exposesReturnsTwice() const { + return ExposesReturnsTwice; } - /// setCallsSetJmp - Set a flag that indicates if there's a call to setjmp or - /// sigsetjmp. - void setCallsSetJmp(bool B) { - CallsSetJmp = B; + /// setCallsSetJmp - Set a flag that indicates if there's a call to + /// a "returns twice" function. + void setExposesReturnsTwice(bool B) { + ExposesReturnsTwice = B; } /// getInfo - Keep track of various per-function pieces of information for @@ -376,7 +380,8 @@ class MachineFunction { MachineMemOperand *getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, uint64_t s, unsigned base_alignment, - const MDNode *TBAAInfo = 0); + const MDNode *TBAAInfo = 0, + const MDNode *Ranges = 0); /// getMachineMemOperand - Allocate a new MachineMemOperand by copying /// an existing one, adjusting by an offset and using the given size. @@ -437,6 +442,7 @@ template <> struct GraphTraits : typedef MachineFunction::iterator nodes_iterator; static nodes_iterator nodes_begin(MachineFunction *F) { return F->begin(); } static nodes_iterator nodes_end (MachineFunction *F) { return F->end(); } + static unsigned size (MachineFunction *F) { return F->size(); } }; template <> struct GraphTraits : public GraphTraits { @@ -452,6 +458,9 @@ template <> struct GraphTraits : static nodes_iterator nodes_end (const MachineFunction *F) { return F->end(); } + static unsigned size (const MachineFunction *F) { + return F->size(); + } }; diff --git a/contrib/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h b/contrib/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h index 50676ad4ad49..50ea2062f30c 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h @@ -26,17 +26,14 @@ class MachineFunction; struct MachineFunctionAnalysis : public FunctionPass { private: const TargetMachine &TM; - CodeGenOpt::Level OptLevel; MachineFunction *MF; unsigned NextFnNum; public: static char ID; - explicit MachineFunctionAnalysis(const TargetMachine &tm, - CodeGenOpt::Level OL = CodeGenOpt::Default); + explicit MachineFunctionAnalysis(const TargetMachine &tm); ~MachineFunctionAnalysis(); MachineFunction &getMF() const { return *MF; } - CodeGenOpt::Level getOptLevel() const { return OptLevel; } virtual const char* getPassName() const { return "Machine Function Analysis"; diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h index cae38f34709d..65093d7e7ad6 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Target/TargetOpcodes.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/STLExtras.h" @@ -53,9 +54,11 @@ class MachineInstr : public ilist_node { }; enum MIFlag { - NoFlags = 0, - FrameSetup = 1 << 0 // Instruction is used as a part of + NoFlags = 0, + FrameSetup = 1 << 0, // Instruction is used as a part of // function frame setup code. + InsideBundle = 1 << 1 // Instruction is inside a bundle (not + // the first MI in a bundle) }; private: const MCInstrDesc *MCID; // Instruction descriptor. @@ -71,9 +74,10 @@ class MachineInstr : public ilist_node { // anything other than to convey comment // information to AsmPrinter. + uint16_t NumMemRefs; // information on memory references + mmo_iterator MemRefs; + std::vector Operands; // the operands - mmo_iterator MemRefs; // information on memory references - mmo_iterator MemRefsEnd; MachineBasicBlock *Parent; // Pointer to the owning basic block. DebugLoc debugLoc; // Source line information. @@ -148,6 +152,12 @@ class MachineInstr : public ilist_node { AsmPrinterFlags |= (uint8_t)Flag; } + /// clearAsmPrinterFlag - clear specific AsmPrinter flags + /// + void clearAsmPrinterFlag(CommentFlag Flag) { + AsmPrinterFlags &= ~Flag; + } + /// getFlags - Return the MI flags bitvector. uint8_t getFlags() const { return Flags; @@ -167,12 +177,64 @@ class MachineInstr : public ilist_node { Flags = flags; } - /// clearAsmPrinterFlag - clear specific AsmPrinter flags - /// - void clearAsmPrinterFlag(CommentFlag Flag) { - AsmPrinterFlags &= ~Flag; + /// clearFlag - Clear a MI flag. + void clearFlag(MIFlag Flag) { + Flags &= ~((uint8_t)Flag); } + /// isInsideBundle - Return true if MI is in a bundle (but not the first MI + /// in a bundle). + /// + /// A bundle looks like this before it's finalized: + /// ---------------- + /// | MI | + /// ---------------- + /// | + /// ---------------- + /// | MI * | + /// ---------------- + /// | + /// ---------------- + /// | MI * | + /// ---------------- + /// In this case, the first MI starts a bundle but is not inside a bundle, the + /// next 2 MIs are considered "inside" the bundle. + /// + /// After a bundle is finalized, it looks like this: + /// ---------------- + /// | Bundle | + /// ---------------- + /// | + /// ---------------- + /// | MI * | + /// ---------------- + /// | + /// ---------------- + /// | MI * | + /// ---------------- + /// | + /// ---------------- + /// | MI * | + /// ---------------- + /// The first instruction has the special opcode "BUNDLE". It's not "inside" + /// a bundle, but the next three MIs are. + bool isInsideBundle() const { + return getFlag(InsideBundle); + } + + /// setIsInsideBundle - Set InsideBundle bit. + /// + void setIsInsideBundle(bool Val = true) { + if (Val) + setFlag(InsideBundle); + else + clearFlag(InsideBundle); + } + + /// isBundled - Return true if this instruction part of a bundle. This is true + /// if either itself or its following instruction is marked "InsideBundle". + bool isBundled() const; + /// getDebugLoc - Returns the debug location id of this MachineInstr. /// DebugLoc getDebugLoc() const { return debugLoc; } @@ -223,15 +285,285 @@ class MachineInstr : public ilist_node { /// Access to memory operands of the instruction mmo_iterator memoperands_begin() const { return MemRefs; } - mmo_iterator memoperands_end() const { return MemRefsEnd; } - bool memoperands_empty() const { return MemRefsEnd == MemRefs; } + mmo_iterator memoperands_end() const { return MemRefs + NumMemRefs; } + bool memoperands_empty() const { return NumMemRefs == 0; } /// hasOneMemOperand - Return true if this instruction has exactly one /// MachineMemOperand. bool hasOneMemOperand() const { - return MemRefsEnd - MemRefs == 1; + return NumMemRefs == 1; } + /// API for querying MachineInstr properties. They are the same as MCInstrDesc + /// queries but they are bundle aware. + + enum QueryType { + IgnoreBundle, // Ignore bundles + AnyInBundle, // Return true if any instruction in bundle has property + AllInBundle // Return true if all instructions in bundle have property + }; + + /// hasProperty - Return true if the instruction (or in the case of a bundle, + /// the instructions inside the bundle) has the specified property. + /// The first argument is the property being queried. + /// The second argument indicates whether the query should look inside + /// instruction bundles. + bool hasProperty(unsigned MCFlag, QueryType Type = AnyInBundle) const { + // Inline the fast path. + if (Type == IgnoreBundle || !isBundle()) + return getDesc().getFlags() & (1 << MCFlag); + + // If we have a bundle, take the slow path. + return hasPropertyInBundle(1 << MCFlag, Type); + } + + /// isVariadic - Return true if this instruction can have a variable number of + /// operands. In this case, the variable operands will be after the normal + /// operands but before the implicit definitions and uses (if any are + /// present). + bool isVariadic(QueryType Type = IgnoreBundle) const { + return hasProperty(MCID::Variadic, Type); + } + + /// hasOptionalDef - Set if this instruction has an optional definition, e.g. + /// ARM instructions which can set condition code if 's' bit is set. + bool hasOptionalDef(QueryType Type = IgnoreBundle) const { + return hasProperty(MCID::HasOptionalDef, Type); + } + + /// isPseudo - Return true if this is a pseudo instruction that doesn't + /// correspond to a real machine instruction. + /// + bool isPseudo(QueryType Type = IgnoreBundle) const { + return hasProperty(MCID::Pseudo, Type); + } + + bool isReturn(QueryType Type = AnyInBundle) const { + return hasProperty(MCID::Return, Type); + } + + bool isCall(QueryType Type = AnyInBundle) const { + return hasProperty(MCID::Call, Type); + } + + /// isBarrier - Returns true if the specified instruction stops control flow + /// from executing the instruction immediately following it. Examples include + /// unconditional branches and return instructions. + bool isBarrier(QueryType Type = AnyInBundle) const { + return hasProperty(MCID::Barrier, Type); + } + + /// isTerminator - Returns true if this instruction part of the terminator for + /// a basic block. Typically this is things like return and branch + /// instructions. + /// + /// Various passes use this to insert code into the bottom of a basic block, + /// but before control flow occurs. + bool isTerminator(QueryType Type = AnyInBundle) const { + return hasProperty(MCID::Terminator, Type); + } + + /// isBranch - Returns true if this is a conditional, unconditional, or + /// indirect branch. Predicates below can be used to discriminate between + /// these cases, and the TargetInstrInfo::AnalyzeBranch method can be used to + /// get more information. + bool isBranch(QueryType Type = AnyInBundle) const { + return hasProperty(MCID::Branch, Type); + } + + /// isIndirectBranch - Return true if this is an indirect branch, such as a + /// branch through a register. + bool isIndirectBranch(QueryType Type = AnyInBundle) const { + return hasProperty(MCID::IndirectBranch, Type); + } + + /// isConditionalBranch - Return true if this is a branch which may fall + /// through to the next instruction or may transfer control flow to some other + /// block. The TargetInstrInfo::AnalyzeBranch method can be used to get more + /// information about this branch. + bool isConditionalBranch(QueryType Type = AnyInBundle) const { + return isBranch(Type) & !isBarrier(Type) & !isIndirectBranch(Type); + } + + /// isUnconditionalBranch - Return true if this is a branch which always + /// transfers control flow to some other block. The + /// TargetInstrInfo::AnalyzeBranch method can be used to get more information + /// about this branch. + bool isUnconditionalBranch(QueryType Type = AnyInBundle) const { + return isBranch(Type) & isBarrier(Type) & !isIndirectBranch(Type); + } + + // isPredicable - Return true if this instruction has a predicate operand that + // controls execution. It may be set to 'always', or may be set to other + /// values. There are various methods in TargetInstrInfo that can be used to + /// control and modify the predicate in this instruction. + bool isPredicable(QueryType Type = AllInBundle) const { + // If it's a bundle than all bundled instructions must be predicable for this + // to return true. + return hasProperty(MCID::Predicable, Type); + } + + /// isCompare - Return true if this instruction is a comparison. + bool isCompare(QueryType Type = IgnoreBundle) const { + return hasProperty(MCID::Compare, Type); + } + + /// isMoveImmediate - Return true if this instruction is a move immediate + /// (including conditional moves) instruction. + bool isMoveImmediate(QueryType Type = IgnoreBundle) const { + return hasProperty(MCID::MoveImm, Type); + } + + /// isBitcast - Return true if this instruction is a bitcast instruction. + /// + bool isBitcast(QueryType Type = IgnoreBundle) const { + return hasProperty(MCID::Bitcast, Type); + } + + /// isNotDuplicable - Return true if this instruction cannot be safely + /// duplicated. For example, if the instruction has a unique labels attached + /// to it, duplicating it would cause multiple definition errors. + bool isNotDuplicable(QueryType Type = AnyInBundle) const { + return hasProperty(MCID::NotDuplicable, Type); + } + + /// hasDelaySlot - Returns true if the specified instruction has a delay slot + /// which must be filled by the code generator. + bool hasDelaySlot(QueryType Type = AnyInBundle) const { + return hasProperty(MCID::DelaySlot, Type); + } + + /// canFoldAsLoad - Return true for instructions that can be folded as + /// memory operands in other instructions. The most common use for this + /// is instructions that are simple loads from memory that don't modify + /// the loaded value in any way, but it can also be used for instructions + /// that can be expressed as constant-pool loads, such as V_SETALLONES + /// on x86, to allow them to be folded when it is beneficial. + /// This should only be set on instructions that return a value in their + /// only virtual register definition. + bool canFoldAsLoad(QueryType Type = IgnoreBundle) const { + return hasProperty(MCID::FoldableAsLoad, Type); + } + + //===--------------------------------------------------------------------===// + // Side Effect Analysis + //===--------------------------------------------------------------------===// + + /// mayLoad - Return true if this instruction could possibly read memory. + /// Instructions with this flag set are not necessarily simple load + /// instructions, they may load a value and modify it, for example. + bool mayLoad(QueryType Type = AnyInBundle) const { + return hasProperty(MCID::MayLoad, Type); + } + + + /// mayStore - Return true if this instruction could possibly modify memory. + /// Instructions with this flag set are not necessarily simple store + /// instructions, they may store a modified value based on their operands, or + /// may not actually modify anything, for example. + bool mayStore(QueryType Type = AnyInBundle) const { + return hasProperty(MCID::MayStore, Type); + } + + //===--------------------------------------------------------------------===// + // Flags that indicate whether an instruction can be modified by a method. + //===--------------------------------------------------------------------===// + + /// isCommutable - Return true if this may be a 2- or 3-address + /// instruction (of the form "X = op Y, Z, ..."), which produces the same + /// result if Y and Z are exchanged. If this flag is set, then the + /// TargetInstrInfo::commuteInstruction method may be used to hack on the + /// instruction. + /// + /// Note that this flag may be set on instructions that are only commutable + /// sometimes. In these cases, the call to commuteInstruction will fail. + /// Also note that some instructions require non-trivial modification to + /// commute them. + bool isCommutable(QueryType Type = IgnoreBundle) const { + return hasProperty(MCID::Commutable, Type); + } + + /// isConvertibleTo3Addr - Return true if this is a 2-address instruction + /// which can be changed into a 3-address instruction if needed. Doing this + /// transformation can be profitable in the register allocator, because it + /// means that the instruction can use a 2-address form if possible, but + /// degrade into a less efficient form if the source and dest register cannot + /// be assigned to the same register. For example, this allows the x86 + /// backend to turn a "shl reg, 3" instruction into an LEA instruction, which + /// is the same speed as the shift but has bigger code size. + /// + /// If this returns true, then the target must implement the + /// TargetInstrInfo::convertToThreeAddress method for this instruction, which + /// is allowed to fail if the transformation isn't valid for this specific + /// instruction (e.g. shl reg, 4 on x86). + /// + bool isConvertibleTo3Addr(QueryType Type = IgnoreBundle) const { + return hasProperty(MCID::ConvertibleTo3Addr, Type); + } + + /// usesCustomInsertionHook - Return true if this instruction requires + /// custom insertion support when the DAG scheduler is inserting it into a + /// machine basic block. If this is true for the instruction, it basically + /// means that it is a pseudo instruction used at SelectionDAG time that is + /// expanded out into magic code by the target when MachineInstrs are formed. + /// + /// If this is true, the TargetLoweringInfo::InsertAtEndOfBasicBlock method + /// is used to insert this into the MachineBasicBlock. + bool usesCustomInsertionHook(QueryType Type = IgnoreBundle) const { + return hasProperty(MCID::UsesCustomInserter, Type); + } + + /// hasPostISelHook - Return true if this instruction requires *adjustment* + /// after instruction selection by calling a target hook. For example, this + /// can be used to fill in ARM 's' optional operand depending on whether + /// the conditional flag register is used. + bool hasPostISelHook(QueryType Type = IgnoreBundle) const { + return hasProperty(MCID::HasPostISelHook, Type); + } + + /// isRematerializable - Returns true if this instruction is a candidate for + /// remat. This flag is deprecated, please don't use it anymore. If this + /// flag is set, the isReallyTriviallyReMaterializable() method is called to + /// verify the instruction is really rematable. + bool isRematerializable(QueryType Type = AllInBundle) const { + // It's only possible to re-mat a bundle if all bundled instructions are + // re-materializable. + return hasProperty(MCID::Rematerializable, Type); + } + + /// isAsCheapAsAMove - Returns true if this instruction has the same cost (or + /// less) than a move instruction. This is useful during certain types of + /// optimizations (e.g., remat during two-address conversion or machine licm) + /// where we would like to remat or hoist the instruction, but not if it costs + /// more than moving the instruction into the appropriate register. Note, we + /// are not marking copies from and to the same register class with this flag. + bool isAsCheapAsAMove(QueryType Type = AllInBundle) const { + // Only returns true for a bundle if all bundled instructions are cheap. + // FIXME: This probably requires a target hook. + return hasProperty(MCID::CheapAsAMove, Type); + } + + /// hasExtraSrcRegAllocReq - Returns true if this instruction source operands + /// have special register allocation requirements that are not captured by the + /// operand register classes. e.g. ARM::STRD's two source registers must be an + /// even / odd pair, ARM::STM registers have to be in ascending order. + /// Post-register allocation passes should not attempt to change allocations + /// for sources of instructions with this flag. + bool hasExtraSrcRegAllocReq(QueryType Type = AnyInBundle) const { + return hasProperty(MCID::ExtraSrcRegAllocReq, Type); + } + + /// hasExtraDefRegAllocReq - Returns true if this instruction def operands + /// have special register allocation requirements that are not captured by the + /// operand register classes. e.g. ARM::LDRD's two def registers must be an + /// even / odd pair, ARM::LDM registers have to be in ascending order. + /// Post-register allocation passes should not attempt to change allocations + /// for definitions of instructions with this flag. + bool hasExtraDefRegAllocReq(QueryType Type = AnyInBundle) const { + return hasProperty(MCID::ExtraDefRegAllocReq, Type); + } + + enum MICheckType { CheckDefs, // Check all operands for equality CheckKillDead, // Check all operands including kill / dead markers @@ -281,6 +613,9 @@ class MachineInstr : public ilist_node { bool isRegSequence() const { return getOpcode() == TargetOpcode::REG_SEQUENCE; } + bool isBundle() const { + return getOpcode() == TargetOpcode::BUNDLE; + } bool isCopy() const { return getOpcode() == TargetOpcode::COPY; } @@ -300,6 +635,9 @@ class MachineInstr : public ilist_node { getOperand(0).getSubReg() == getOperand(1).getSubReg(); } + /// getBundleSize - Return the number of instructions inside the MI bundle. + unsigned getBundleSize() const; + /// readsRegister - Return true if the MachineInstr reads the specified /// register. If TargetRegisterInfo is passed, then it also checks if there /// is a read of a super-register. @@ -372,6 +710,7 @@ class MachineInstr : public ilist_node { /// that are not dead are skipped. If Overlap is true, then it also looks for /// defs that merely overlap the specified register. If TargetRegisterInfo is /// non-null, then it also checks if there is a def of a super-register. + /// This may also return a register mask operand when Overlap is true. int findRegisterDefOperandIdx(unsigned Reg, bool isDead = false, bool Overlap = false, const TargetRegisterInfo *TRI = NULL) const; @@ -416,7 +755,7 @@ class MachineInstr : public ilist_node { /// isRegTiedToUseOperand - Given the index of a register def operand, /// check if the register def is tied to a source operand, due to either /// two-address elimination or inline assembly constraints. Returns the - /// first tied use operand index by reference is UseOpIdx is not null. + /// first tied use operand index by reference if UseOpIdx is not null. bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx = 0) const; /// isRegTiedToDefOperand - Return true if the use operand of the specified @@ -448,6 +787,10 @@ class MachineInstr : public ilist_node { const TargetRegisterInfo *RegInfo, bool AddIfNotFound = false); + /// clearRegisterKills - Clear all kill flags affecting Reg. If RegInfo is + /// provided, this includes super-register kills. + void clearRegisterKills(unsigned Reg, const TargetRegisterInfo *RegInfo); + /// addRegisterDead - We have determined MI defined a register without a use. /// Look for the operand that defines it and mark it as IsDead. If /// AddIfNotFound is true, add a implicit operand if it's not found. Returns @@ -462,7 +805,10 @@ class MachineInstr : public ilist_node { /// setPhysRegsDeadExcept - Mark every physreg used by this instruction as /// dead except those in the UsedRegs list. - void setPhysRegsDeadExcept(const SmallVectorImpl &UsedRegs, + /// + /// On instructions with register mask operands, also add implicit-def + /// operands for all registers in UsedRegs. + void setPhysRegsDeadExcept(ArrayRef UsedRegs, const TargetRegisterInfo &TRI); /// isSafeToMove - Return true if it is safe to move this instruction. If @@ -550,7 +896,7 @@ class MachineInstr : public ilist_node { /// list. This does not transfer ownership. void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) { MemRefs = NewMemRefs; - MemRefsEnd = NewMemRefsEnd; + NumMemRefs = NewMemRefsEnd - NewMemRefs; } private: @@ -572,6 +918,10 @@ class MachineInstr : public ilist_node { /// this instruction from their respective use lists. This requires that the /// operands not be on their use lists yet. void AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo); + + /// hasPropertyInBundle - Slow path for hasProperty when we're dealing with a + /// bundle. + bool hasPropertyInBundle(unsigned Mask, QueryType Type) const; }; /// MachineInstrExpressionTrait - Special DenseMapInfo traits to compare diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h index b98902724690..99849a64c56a 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h @@ -34,6 +34,7 @@ namespace RegState { Undef = 0x20, EarlyClobber = 0x40, Debug = 0x80, + DefineNoRead = Define | Undef, ImplicitDefine = Implicit | Define, ImplicitKill = Implicit | Kill }; @@ -124,6 +125,11 @@ class MachineInstrBuilder { return *this; } + const MachineInstrBuilder &addRegMask(const uint32_t *Mask) const { + MI->addOperand(MachineOperand::CreateRegMask(Mask)); + return *this; + } + const MachineInstrBuilder &addMemOperand(MachineMemOperand *MMO) const { MI->addMemOperand(*MI->getParent()->getParent(), MMO); return *this; @@ -209,6 +215,30 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, return MachineInstrBuilder(MI).addReg(DestReg, RegState::Define); } +inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, + MachineBasicBlock::instr_iterator I, + DebugLoc DL, + const MCInstrDesc &MCID, + unsigned DestReg) { + MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL); + BB.insert(I, MI); + return MachineInstrBuilder(MI).addReg(DestReg, RegState::Define); +} + +inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, + MachineInstr *I, + DebugLoc DL, + const MCInstrDesc &MCID, + unsigned DestReg) { + if (I->isInsideBundle()) { + MachineBasicBlock::instr_iterator MII = I; + return BuildMI(BB, MII, DL, MCID, DestReg); + } + + MachineBasicBlock::iterator MII = I; + return BuildMI(BB, MII, DL, MCID, DestReg); +} + /// BuildMI - This version of the builder inserts the newly-built /// instruction before the given position in the given MachineBasicBlock, and /// does NOT take a destination register. @@ -222,6 +252,28 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, return MachineInstrBuilder(MI); } +inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, + MachineBasicBlock::instr_iterator I, + DebugLoc DL, + const MCInstrDesc &MCID) { + MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL); + BB.insert(I, MI); + return MachineInstrBuilder(MI); +} + +inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, + MachineInstr *I, + DebugLoc DL, + const MCInstrDesc &MCID) { + if (I->isInsideBundle()) { + MachineBasicBlock::instr_iterator MII = I; + return BuildMI(BB, MII, DL, MCID); + } + + MachineBasicBlock::iterator MII = I; + return BuildMI(BB, MII, DL, MCID); +} + /// BuildMI - This version of the builder inserts the newly-built /// instruction at the end of the given MachineBasicBlock, and does NOT take a /// destination register. diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h b/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h new file mode 100644 index 000000000000..0fb496982276 --- /dev/null +++ b/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h @@ -0,0 +1,203 @@ +//===-- CodeGen/MachineInstBundle.h - MI bundle utilities -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provide utility functions to manipulate machine instruction +// bundles. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINEINSTRBUNDLE_H +#define LLVM_CODEGEN_MACHINEINSTRBUNDLE_H + +#include "llvm/CodeGen/MachineBasicBlock.h" + +namespace llvm { + +/// finalizeBundle - Finalize a machine instruction bundle which includes +/// a sequence of instructions starting from FirstMI to LastMI (exclusive). +/// This routine adds a BUNDLE instruction to represent the bundle, it adds +/// IsInternalRead markers to MachineOperands which are defined inside the +/// bundle, and it copies externally visible defs and uses to the BUNDLE +/// instruction. +void finalizeBundle(MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator FirstMI, + MachineBasicBlock::instr_iterator LastMI); + +/// finalizeBundle - Same functionality as the previous finalizeBundle except +/// the last instruction in the bundle is not provided as an input. This is +/// used in cases where bundles are pre-determined by marking instructions +/// with 'InsideBundle' marker. It returns the MBB instruction iterator that +/// points to the end of the bundle. +MachineBasicBlock::instr_iterator finalizeBundle(MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator FirstMI); + +/// finalizeBundles - Finalize instruction bundles in the specified +/// MachineFunction. Return true if any bundles are finalized. +bool finalizeBundles(MachineFunction &MF); + +/// getBundleStart - Returns the first instruction in the bundle containing MI. +/// +static inline MachineInstr *getBundleStart(MachineInstr *MI) { + MachineBasicBlock::instr_iterator I = MI; + while (I->isInsideBundle()) + --I; + return I; +} + +static inline const MachineInstr *getBundleStart(const MachineInstr *MI) { + MachineBasicBlock::const_instr_iterator I = MI; + while (I->isInsideBundle()) + --I; + return I; +} + +//===----------------------------------------------------------------------===// +// MachineOperand iterator +// + +/// MachineOperandIteratorBase - Iterator that can visit all operands on a +/// MachineInstr, or all operands on a bundle of MachineInstrs. This class is +/// not intended to be used directly, use one of the sub-classes instead. +/// +/// Intended use: +/// +/// for (MIBundleOperands MIO(MI); MIO.isValid(); ++MIO) { +/// if (!MIO->isReg()) +/// continue; +/// ... +/// } +/// +class MachineOperandIteratorBase { + MachineBasicBlock::instr_iterator InstrI, InstrE; + MachineInstr::mop_iterator OpI, OpE; + + // If the operands on InstrI are exhausted, advance InstrI to the next + // bundled instruction with operands. + void advance() { + while (OpI == OpE) { + // Don't advance off the basic block, or into a new bundle. + if (++InstrI == InstrE || !InstrI->isInsideBundle()) + break; + OpI = InstrI->operands_begin(); + OpE = InstrI->operands_end(); + } + } + +protected: + /// MachineOperandIteratorBase - Create an iterator that visits all operands + /// on MI, or all operands on every instruction in the bundle containing MI. + /// + /// @param MI The instruction to examine. + /// @param WholeBundle When true, visit all operands on the entire bundle. + /// + explicit MachineOperandIteratorBase(MachineInstr *MI, bool WholeBundle) { + if (WholeBundle) { + InstrI = getBundleStart(MI); + InstrE = MI->getParent()->instr_end(); + } else { + InstrI = InstrE = MI; + ++InstrE; + } + OpI = InstrI->operands_begin(); + OpE = InstrI->operands_end(); + if (WholeBundle) + advance(); + } + + MachineOperand &deref() const { return *OpI; } + +public: + /// isValid - Returns true until all the operands have been visited. + bool isValid() const { return OpI != OpE; } + + /// Preincrement. Move to the next operand. + void operator++() { + assert(isValid() && "Cannot advance MIOperands beyond the last operand"); + ++OpI; + advance(); + } + + /// getOperandNo - Returns the number of the current operand relative to its + /// instruction. + /// + unsigned getOperandNo() const { + return OpI - InstrI->operands_begin(); + } + + /// RegInfo - Information about a virtual register used by a set of operands. + /// + struct RegInfo { + /// Reads - One of the operands read the virtual register. This does not + /// include or use operands, see MO::readsReg(). + bool Reads; + + /// Writes - One of the operands writes the virtual register. + bool Writes; + + /// Tied - Uses and defs must use the same register. This can be because of + /// a two-address constraint, or there may be a partial redefinition of a + /// sub-register. + bool Tied; + }; + + /// analyzeVirtReg - Analyze how the current instruction or bundle uses a + /// virtual register. This function should not be called after operator++(), + /// it expects a fresh iterator. + /// + /// @param Reg The virtual register to analyze. + /// @param Ops When set, this vector will receive an (MI, OpNum) entry for + /// each operand referring to Reg. + /// @returns A filled-in RegInfo struct. + RegInfo analyzeVirtReg(unsigned Reg, + SmallVectorImpl > *Ops = 0); +}; + +/// MIOperands - Iterate over operands of a single instruction. +/// +class MIOperands : public MachineOperandIteratorBase { +public: + MIOperands(MachineInstr *MI) : MachineOperandIteratorBase(MI, false) {} + MachineOperand &operator* () const { return deref(); } + MachineOperand *operator->() const { return &deref(); } +}; + +/// ConstMIOperands - Iterate over operands of a single const instruction. +/// +class ConstMIOperands : public MachineOperandIteratorBase { +public: + ConstMIOperands(const MachineInstr *MI) + : MachineOperandIteratorBase(const_cast(MI), false) {} + const MachineOperand &operator* () const { return deref(); } + const MachineOperand *operator->() const { return &deref(); } +}; + +/// MIBundleOperands - Iterate over all operands in a bundle of machine +/// instructions. +/// +class MIBundleOperands : public MachineOperandIteratorBase { +public: + MIBundleOperands(MachineInstr *MI) : MachineOperandIteratorBase(MI, true) {} + MachineOperand &operator* () const { return deref(); } + MachineOperand *operator->() const { return &deref(); } +}; + +/// ConstMIBundleOperands - Iterate over all operands in a const bundle of +/// machine instructions. +/// +class ConstMIBundleOperands : public MachineOperandIteratorBase { +public: + ConstMIBundleOperands(const MachineInstr *MI) + : MachineOperandIteratorBase(const_cast(MI), true) {} + const MachineOperand &operator* () const { return deref(); } + const MachineOperand *operator->() const { return &deref(); } +}; + +} // End llvm namespace + +#endif diff --git a/contrib/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h index 62643497655a..6bd6682dd39c 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h @@ -47,7 +47,12 @@ class MachineJumpTableInfo { /// EK_BlockAddress - Each entry is a plain address of block, e.g.: /// .word LBB123 EK_BlockAddress, - + + /// EK_GPRel64BlockAddress - Each entry is an address of block, encoded + /// with a relocation as gp-relative, e.g.: + /// .gpdword LBB123 + EK_GPRel64BlockAddress, + /// EK_GPRel32BlockAddress - Each entry is an address of block, encoded /// with a relocation as gp-relative, e.g.: /// .gprel32 LBB123 diff --git a/contrib/llvm/include/llvm/CodeGen/MachineMemOperand.h b/contrib/llvm/include/llvm/CodeGen/MachineMemOperand.h index 768ce47f8b39..1ac9080b75d5 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineMemOperand.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineMemOperand.h @@ -22,6 +22,7 @@ namespace llvm { class Value; class FoldingSetNodeID; +class MDNode; class raw_ostream; /// MachinePointerInfo - This class contains a discriminated union of @@ -83,6 +84,7 @@ class MachineMemOperand { uint64_t Size; unsigned Flags; const MDNode *TBAAInfo; + const MDNode *Ranges; public: /// Flags values. These may be or'd together. @@ -95,14 +97,17 @@ class MachineMemOperand { MOVolatile = 4, /// The memory access is non-temporal. MONonTemporal = 8, + /// The memory access is invariant. + MOInvariant = 16, // This is the number of bits we need to represent flags. - MOMaxBits = 4 + MOMaxBits = 5 }; /// MachineMemOperand - Construct an MachineMemOperand object with the /// specified PtrInfo, flags, size, and base alignment. MachineMemOperand(MachinePointerInfo PtrInfo, unsigned flags, uint64_t s, - unsigned base_alignment, const MDNode *TBAAInfo = 0); + unsigned base_alignment, const MDNode *TBAAInfo = 0, + const MDNode *Ranges = 0); const MachinePointerInfo &getPointerInfo() const { return PtrInfo; } @@ -137,10 +142,14 @@ class MachineMemOperand { /// getTBAAInfo - Return the TBAA tag for the memory reference. const MDNode *getTBAAInfo() const { return TBAAInfo; } + /// getRanges - Return the range tag for the memory reference. + const MDNode *getRanges() const { return Ranges; } + bool isLoad() const { return Flags & MOLoad; } bool isStore() const { return Flags & MOStore; } bool isVolatile() const { return Flags & MOVolatile; } bool isNonTemporal() const { return Flags & MONonTemporal; } + bool isInvariant() const { return Flags & MOInvariant; } /// refineAlignment - Update this MachineMemOperand to reflect the alignment /// of MMO, if it has a greater alignment. This must only be used when the diff --git a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h index 2bf7f1788f8a..6b88d4a9499b 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h @@ -161,10 +161,10 @@ class MachineModuleInfo : public ImmutablePass { /// in this module. bool DbgInfoAvailable; - /// CallsExternalVAFunctionWithFloatingPointArguments - True if this module - /// calls VarArg function with floating point arguments. This is used to emit - /// an undefined reference to fltused on Windows targets. - bool CallsExternalVAFunctionWithFloatingPointArguments; + /// UsesVAFloatArgument - True if this module calls VarArg function with + /// floating-point arguments. This is used to emit an undefined reference + /// to _fltused on Windows targets. + bool UsesVAFloatArgument; public: static char ID; // Pass identification, replacement for typeid @@ -223,12 +223,12 @@ class MachineModuleInfo : public ImmutablePass { bool callsUnwindInit() const { return CallsUnwindInit; } void setCallsUnwindInit(bool b) { CallsUnwindInit = b; } - bool callsExternalVAFunctionWithFloatingPointArguments() const { - return CallsExternalVAFunctionWithFloatingPointArguments; + bool usesVAFloatArgument() const { + return UsesVAFloatArgument; } - void setCallsExternalVAFunctionWithFloatingPointArguments(bool b) { - CallsExternalVAFunctionWithFloatingPointArguments = b; + void setUsesVAFloatArgument(bool b) { + UsesVAFloatArgument = b; } /// getFrameMoves - Returns a reference to a list of moves done in the current diff --git a/contrib/llvm/include/llvm/CodeGen/MachineOperand.h b/contrib/llvm/include/llvm/CodeGen/MachineOperand.h index 5440a636a4a5..d244dd92103d 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineOperand.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineOperand.h @@ -48,6 +48,7 @@ class MachineOperand { MO_ExternalSymbol, ///< Name of external global symbol MO_GlobalAddress, ///< Address of a global value MO_BlockAddress, ///< Address of a basic block + MO_RegisterMask, ///< Mask of preserved registers. MO_Metadata, ///< Metadata reference (for debug info) MO_MCSymbol ///< MCSymbol reference (for debug/eh info) }; @@ -102,6 +103,17 @@ class MachineOperand { /// bool IsUndef : 1; + /// IsInternalRead - True if this operand reads a value that was defined + /// inside the same instruction or bundle. This flag can be set on both use + /// and def operands. On a sub-register def operand, it refers to the part + /// of the register that isn't written. On a full-register def operand, it + /// is a noop. + /// + /// When this flag is set, the instruction bundle must contain at least one + /// other def of the register. If multiple instructions in the bundle define + /// the register, the meaning is target-defined. + bool IsInternalRead : 1; + /// IsEarlyClobber - True if this MO_Register 'def' operand is written to /// by the MachineInstr before all input registers are read. This is used to /// model the GCC inline asm '&' constraint modifier. @@ -130,6 +142,7 @@ class MachineOperand { const ConstantFP *CFP; // For MO_FPImmediate. const ConstantInt *CI; // For MO_CImmediate. Integers > 64bit. int64_t ImmVal; // For MO_Immediate. + const uint32_t *RegMask; // For MO_RegisterMask. const MDNode *MD; // For MO_Metadata. MCSymbol *Sym; // For MO_MCSymbol @@ -209,10 +222,13 @@ class MachineOperand { bool isSymbol() const { return OpKind == MO_ExternalSymbol; } /// isBlockAddress - Tests if this is a MO_BlockAddress operand. bool isBlockAddress() const { return OpKind == MO_BlockAddress; } + /// isRegMask - Tests if this is a MO_RegisterMask operand. + bool isRegMask() const { return OpKind == MO_RegisterMask; } /// isMetadata - Tests if this is a MO_Metadata operand. bool isMetadata() const { return OpKind == MO_Metadata; } bool isMCSymbol() const { return OpKind == MO_MCSymbol; } + //===--------------------------------------------------------------------===// // Accessors for Register Operands //===--------------------------------------------------------------------===// @@ -258,6 +274,11 @@ class MachineOperand { return IsUndef; } + bool isInternalRead() const { + assert(isReg() && "Wrong MachineOperand accessor"); + return IsInternalRead; + } + bool isEarlyClobber() const { assert(isReg() && "Wrong MachineOperand accessor"); return IsEarlyClobber; @@ -272,9 +293,12 @@ class MachineOperand { /// register. A use operand with the flag set doesn't read its /// register. A sub-register def implicitly reads the other parts of the /// register being redefined unless the flag is set. + /// + /// This refers to reading the register value from before the current + /// instruction or bundle. Internal bundle reads are not included. bool readsReg() const { assert(isReg() && "Wrong MachineOperand accessor"); - return !isUndef() && (isUse() || getSubReg()); + return !isUndef() && !isInternalRead() && (isUse() || getSubReg()); } /// getNextOperandForReg - Return the next MachineOperand in the function that @@ -343,6 +367,11 @@ class MachineOperand { IsUndef = Val; } + void setIsInternalRead(bool Val = true) { + assert(isReg() && "Wrong MachineOperand accessor"); + IsInternalRead = Val; + } + void setIsEarlyClobber(bool Val = true) { assert(isReg() && IsDef && "Wrong MachineOperand accessor"); IsEarlyClobber = Val; @@ -412,6 +441,28 @@ class MachineOperand { return Contents.OffsetedInfo.Val.SymbolName; } + /// clobbersPhysReg - Returns true if this RegMask clobbers PhysReg. + /// It is sometimes necessary to detach the register mask pointer from its + /// machine operand. This static method can be used for such detached bit + /// mask pointers. + static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg) { + // See TargetRegisterInfo.h. + assert(PhysReg < (1u << 30) && "Not a physical register"); + return !(RegMask[PhysReg / 32] & (1u << PhysReg % 32)); + } + + /// clobbersPhysReg - Returns true if this RegMask operand clobbers PhysReg. + bool clobbersPhysReg(unsigned PhysReg) const { + return clobbersPhysReg(getRegMask(), PhysReg); + } + + /// getRegMask - Returns a bit mask of registers preserved by this RegMask + /// operand. + const uint32_t *getRegMask() const { + assert(isRegMask() && "Wrong MachineOperand accessor"); + return Contents.RegMask; + } + const MDNode *getMetadata() const { assert(isMetadata() && "Wrong MachineOperand accessor"); return Contents.MD; @@ -498,6 +549,7 @@ class MachineOperand { Op.IsKill = isKill; Op.IsDead = isDead; Op.IsUndef = isUndef; + Op.IsInternalRead = false; Op.IsEarlyClobber = isEarlyClobber; Op.IsDebug = isDebug; Op.SmallContents.RegNo = Reg; @@ -557,6 +609,24 @@ class MachineOperand { Op.setTargetFlags(TargetFlags); return Op; } + /// CreateRegMask - Creates a register mask operand referencing Mask. The + /// operand does not take ownership of the memory referenced by Mask, it must + /// remain valid for the lifetime of the operand. + /// + /// A RegMask operand represents a set of non-clobbered physical registers on + /// an instruction that clobbers many registers, typically a call. The bit + /// mask has a bit set for each physreg that is preserved by this + /// instruction, as described in the documentation for + /// TargetRegisterInfo::getCallPreservedMask(). + /// + /// Any physreg with a 0 bit in the mask is clobbered by the instruction. + /// + static MachineOperand CreateRegMask(const uint32_t *Mask) { + assert(Mask && "Missing register mask"); + MachineOperand Op(MachineOperand::MO_RegisterMask); + Op.Contents.RegMask = Mask; + return Op; + } static MachineOperand CreateMetadata(const MDNode *Meta) { MachineOperand Op(MachineOperand::MO_Metadata); Op.Contents.MD = Meta; diff --git a/contrib/llvm/include/llvm/CodeGen/MachinePassRegistry.h b/contrib/llvm/include/llvm/CodeGen/MachinePassRegistry.h index 6ee2e90a9f56..c41e8e26d66a 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachinePassRegistry.h +++ b/contrib/llvm/include/llvm/CodeGen/MachinePassRegistry.h @@ -33,6 +33,7 @@ typedef void *(*MachinePassCtor)(); /// //===----------------------------------------------------------------------===// class MachinePassRegistryListener { + virtual void anchor(); public: MachinePassRegistryListener() {} virtual ~MachinePassRegistryListener() {} diff --git a/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h index 3866b2650d0b..3272fbd78ff5 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h @@ -15,12 +15,13 @@ #define LLVM_CODEGEN_MACHINEREGISTERINFO_H #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/IndexedMap.h" #include namespace llvm { - + /// MachineRegisterInfo - Keep track of information for virtual and physical /// registers, including vreg register classes, use/def chains for registers, /// etc. @@ -31,6 +32,11 @@ class MachineRegisterInfo { /// registers have a single def. bool IsSSA; + /// TracksLiveness - True while register liveness is being tracked accurately. + /// Basic block live-in lists, kill flags, and implicit defs may not be + /// accurate when after this flag is cleared. + bool TracksLiveness; + /// VRegInfo - Information we keep for each virtual register. /// /// Each element in this list contains the register class of the vreg and the @@ -46,18 +52,32 @@ class MachineRegisterInfo { /// the allocator should prefer the physical register allocated to the virtual /// register of the hint. IndexedMap, VirtReg2IndexFunctor> RegAllocHints; - + /// PhysRegUseDefLists - This is an array of the head of the use/def list for /// physical registers. - MachineOperand **PhysRegUseDefLists; - + MachineOperand **PhysRegUseDefLists; + /// UsedPhysRegs - This is a bit vector that is computed and set by the /// register allocator, and must be kept up to date by passes that run after /// register allocation (though most don't modify this). This is used /// so that the code generator knows which callee save registers to save and /// for other target specific uses. + /// This vector only has bits set for registers explicitly used, not their + /// aliases. BitVector UsedPhysRegs; - + + /// UsedPhysRegMask - Additional used physregs, but including aliases. + BitVector UsedPhysRegMask; + + /// ReservedRegs - This is a bit vector of reserved registers. The target + /// may change its mind about which registers should be reserved. This + /// vector is the frozen set of reserved registers when register allocation + /// started. + BitVector ReservedRegs; + + /// AllocatableRegs - From TRI->getAllocatableSet. + mutable BitVector AllocatableRegs; + /// LiveIns/LiveOuts - Keep track of the physical registers that are /// livein/liveout of the function. Live in values are typically arguments in /// registers, live out values are typically return values in registers. @@ -65,7 +85,7 @@ class MachineRegisterInfo { /// stored in the second element. std::vector > LiveIns; std::vector LiveOuts; - + MachineRegisterInfo(const MachineRegisterInfo&); // DO NOT IMPLEMENT void operator=(const MachineRegisterInfo&); // DO NOT IMPLEMENT public: @@ -88,6 +108,23 @@ class MachineRegisterInfo { // leaveSSA - Indicates that the machine function is no longer in SSA form. void leaveSSA() { IsSSA = false; } + /// tracksLiveness - Returns true when tracking register liveness accurately. + /// + /// While this flag is true, register liveness information in basic block + /// live-in lists and machine instruction operands is accurate. This means it + /// can be used to change the code in ways that affect the values in + /// registers, for example by the register scavenger. + /// + /// When this flag is false, liveness is no longer reliable. + bool tracksLiveness() const { return TracksLiveness; } + + /// invalidateLiveness - Indicates that register liveness is no longer being + /// tracked accurately. + /// + /// This should be called by late passes that invalidate the liveness + /// information. + void invalidateLiveness() { TracksLiveness = false; } + //===--------------------------------------------------------------------===// // Register Info //===--------------------------------------------------------------------===// @@ -141,7 +178,7 @@ class MachineRegisterInfo { return use_iterator(getRegUseDefListHead(RegNo)); } static use_iterator use_end() { return use_iterator(0); } - + /// use_empty - Return true if there are no instructions using the specified /// register. bool use_empty(unsigned RegNo) const { return use_begin(RegNo) == use_end(); } @@ -157,7 +194,7 @@ class MachineRegisterInfo { return use_nodbg_iterator(getRegUseDefListHead(RegNo)); } static use_nodbg_iterator use_nodbg_end() { return use_nodbg_iterator(0); } - + /// use_nodbg_empty - Return true if there are no non-Debug instructions /// using the specified register. bool use_nodbg_empty(unsigned RegNo) const { @@ -171,8 +208,16 @@ class MachineRegisterInfo { /// replaceRegWith - Replace all instances of FromReg with ToReg in the /// machine function. This is like llvm-level X->replaceAllUsesWith(Y), /// except that it also changes any definitions of the register as well. + /// + /// Note that it is usually necessary to first constrain ToReg's register + /// class to match the FromReg constraints using: + /// + /// constrainRegClass(ToReg, getRegClass(FromReg)) + /// + /// That function will return NULL if the virtual registers have incompatible + /// constraints. void replaceRegWith(unsigned FromReg, unsigned ToReg); - + /// getRegUseDefListHead - Return the head pointer for the register use/def /// list for the specified virtual or physical register. MachineOperand *&getRegUseDefListHead(unsigned RegNo) { @@ -180,7 +225,7 @@ class MachineRegisterInfo { return VRegInfo[RegNo].second; return PhysRegUseDefLists[RegNo]; } - + MachineOperand *getRegUseDefListHead(unsigned RegNo) const { if (TargetRegisterInfo::isVirtualRegister(RegNo)) return VRegInfo[RegNo].second; @@ -197,15 +242,20 @@ class MachineRegisterInfo { /// optimization passes which extend register lifetimes and need only /// preserve conservative kill flag information. void clearKillFlags(unsigned Reg) const; - + #ifndef NDEBUG void dumpUses(unsigned RegNo) const; #endif - + + /// isConstantPhysReg - Returns true if PhysReg is unallocatable and constant + /// throughout the function. It is safe to move instructions that read such + /// a physreg. + bool isConstantPhysReg(unsigned PhysReg, const MachineFunction &MF) const; + //===--------------------------------------------------------------------===// // Virtual Register Info //===--------------------------------------------------------------------===// - + /// getRegClass - Return the register class of the specified virtual register. /// const TargetRegisterClass *getRegClass(unsigned Reg) const { @@ -246,6 +296,9 @@ class MachineRegisterInfo { /// unsigned getNumVirtRegs() const { return VRegInfo.size(); } + /// clearVirtRegs - Remove all virtual registers (after physreg assignment). + void clearVirtRegs(); + /// setRegAllocationHint - Specify a register allocation hint for the /// specified virtual register. void setRegAllocationHint(unsigned Reg, unsigned Type, unsigned PrefReg) { @@ -271,38 +324,87 @@ class MachineRegisterInfo { //===--------------------------------------------------------------------===// // Physical Register Use Info //===--------------------------------------------------------------------===// - + /// isPhysRegUsed - Return true if the specified register is used in this /// function. This only works after register allocation. - bool isPhysRegUsed(unsigned Reg) const { return UsedPhysRegs[Reg]; } - + bool isPhysRegUsed(unsigned Reg) const { + return UsedPhysRegs.test(Reg) || UsedPhysRegMask.test(Reg); + } + + /// isPhysRegOrOverlapUsed - Return true if Reg or any overlapping register + /// is used in this function. + bool isPhysRegOrOverlapUsed(unsigned Reg) const { + if (UsedPhysRegMask.test(Reg)) + return true; + for (const uint16_t *AI = TRI->getOverlaps(Reg); *AI; ++AI) + if (UsedPhysRegs.test(*AI)) + return true; + return false; + } + /// setPhysRegUsed - Mark the specified register used in this function. /// This should only be called during and after register allocation. - void setPhysRegUsed(unsigned Reg) { UsedPhysRegs[Reg] = true; } + void setPhysRegUsed(unsigned Reg) { UsedPhysRegs.set(Reg); } /// addPhysRegsUsed - Mark the specified registers used in this function. /// This should only be called during and after register allocation. void addPhysRegsUsed(const BitVector &Regs) { UsedPhysRegs |= Regs; } + /// addPhysRegsUsedFromRegMask - Mark any registers not in RegMask as used. + /// This corresponds to the bit mask attached to register mask operands. + void addPhysRegsUsedFromRegMask(const uint32_t *RegMask) { + UsedPhysRegMask.setBitsNotInMask(RegMask); + } + /// setPhysRegUnused - Mark the specified register unused in this function. /// This should only be called during and after register allocation. - void setPhysRegUnused(unsigned Reg) { UsedPhysRegs[Reg] = false; } + void setPhysRegUnused(unsigned Reg) { + UsedPhysRegs.reset(Reg); + UsedPhysRegMask.reset(Reg); + } + + + //===--------------------------------------------------------------------===// + // Reserved Register Info + //===--------------------------------------------------------------------===// + // + // The set of reserved registers must be invariant during register + // allocation. For example, the target cannot suddenly decide it needs a + // frame pointer when the register allocator has already used the frame + // pointer register for something else. + // + // These methods can be used by target hooks like hasFP() to avoid changing + // the reserved register set during register allocation. + + /// freezeReservedRegs - Called by the register allocator to freeze the set + /// of reserved registers before allocation begins. + void freezeReservedRegs(const MachineFunction&); + + /// reservedRegsFrozen - Returns true after freezeReservedRegs() was called + /// to ensure the set of reserved registers stays constant. + bool reservedRegsFrozen() const { + return !ReservedRegs.empty(); + } + + /// canReserveReg - Returns true if PhysReg can be used as a reserved + /// register. Any register can be reserved before freezeReservedRegs() is + /// called. + bool canReserveReg(unsigned PhysReg) const { + return !reservedRegsFrozen() || ReservedRegs.test(PhysReg); + } - /// closePhysRegsUsed - Expand UsedPhysRegs to its transitive closure over - /// subregisters. That means that if R is used, so are all subregisters. - void closePhysRegsUsed(const TargetRegisterInfo&); //===--------------------------------------------------------------------===// // LiveIn/LiveOut Management //===--------------------------------------------------------------------===// - + /// addLiveIn/Out - Add the specified register as a live in/out. Note that it /// is an error to add the same register to the same set more than once. void addLiveIn(unsigned Reg, unsigned vreg = 0) { LiveIns.push_back(std::make_pair(Reg, vreg)); } void addLiveOut(unsigned Reg) { LiveOuts.push_back(Reg); } - + // Iteration support for live in/out sets. These sets are kept in sorted // order by their register number. typedef std::vector >::const_iterator @@ -334,7 +436,7 @@ class MachineRegisterInfo { private: void HandleVRegListReallocation(); - + public: /// defusechain_iterator - This class provides iterator support for machine /// operands in the function that use or define a specific register. If @@ -362,31 +464,31 @@ class MachineRegisterInfo { MachineInstr, ptrdiff_t>::reference reference; typedef std::iterator::pointer pointer; - + defusechain_iterator(const defusechain_iterator &I) : Op(I.Op) {} defusechain_iterator() : Op(0) {} - + bool operator==(const defusechain_iterator &x) const { return Op == x.Op; } bool operator!=(const defusechain_iterator &x) const { return !operator==(x); } - + /// atEnd - return true if this iterator is equal to reg_end() on the value. bool atEnd() const { return Op == 0; } - + // Iterator traversal: forward iteration only defusechain_iterator &operator++() { // Preincrement assert(Op && "Cannot increment end iterator!"); Op = Op->getNextOperandForReg(); - + // If this is an operand we don't care about, skip it. - while (Op && ((!ReturnUses && Op->isUse()) || + while (Op && ((!ReturnUses && Op->isUse()) || (!ReturnDefs && Op->isDef()) || (SkipDebug && Op->isDebug()))) Op = Op->getNextOperandForReg(); - + return *this; } defusechain_iterator operator++(int) { // Postincrement @@ -404,30 +506,38 @@ class MachineRegisterInfo { return MI; } + MachineInstr *skipBundle() { + if (!Op) return 0; + MachineInstr *MI = getBundleStart(Op->getParent()); + do ++*this; + while (Op && getBundleStart(Op->getParent()) == MI); + return MI; + } + MachineOperand &getOperand() const { assert(Op && "Cannot dereference end iterator!"); return *Op; } - + /// getOperandNo - Return the operand # of this MachineOperand in its /// MachineInstr. unsigned getOperandNo() const { assert(Op && "Cannot dereference end iterator!"); return Op - &Op->getParent()->getOperand(0); } - + // Retrieve a reference to the current operand. MachineInstr &operator*() const { assert(Op && "Cannot dereference end iterator!"); return *Op->getParent(); } - + MachineInstr *operator->() const { assert(Op && "Cannot dereference end iterator!"); return Op->getParent(); } }; - + }; } // End llvm namespace diff --git a/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h b/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h new file mode 100644 index 000000000000..e852009f7e8b --- /dev/null +++ b/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -0,0 +1,91 @@ +//==- MachineScheduler.h - MachineInstr Scheduling Pass ----------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides a MachineSchedRegistry for registering alternative machine +// schedulers. A Target may provide an alternative scheduler implementation by +// implementing the following boilerplate: +// +// static ScheduleDAGInstrs *createCustomMachineSched(MachineSchedContext *C) { +// return new CustomMachineScheduler(C); +// } +// static MachineSchedRegistry +// SchedCustomRegistry("custom", "Run my target's custom scheduler", +// createCustomMachineSched); +// +// Inside PassConfig: +// enablePass(MachineSchedulerID); +// MachineSchedRegistry::setDefault(createCustomMachineSched); +// +//===----------------------------------------------------------------------===// + +#ifndef MACHINESCHEDULER_H +#define MACHINESCHEDULER_H + +#include "llvm/CodeGen/MachinePassRegistry.h" + +namespace llvm { + +class AliasAnalysis; +class LiveIntervals; +class MachineDominatorTree; +class MachineLoopInfo; +class ScheduleDAGInstrs; + +/// MachineSchedContext provides enough context from the MachineScheduler pass +/// for the target to instantiate a scheduler. +struct MachineSchedContext { + MachineFunction *MF; + const MachineLoopInfo *MLI; + const MachineDominatorTree *MDT; + const TargetPassConfig *PassConfig; + AliasAnalysis *AA; + LiveIntervals *LIS; + + MachineSchedContext(): MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {} +}; + +/// MachineSchedRegistry provides a selection of available machine instruction +/// schedulers. +class MachineSchedRegistry : public MachinePassRegistryNode { +public: + typedef ScheduleDAGInstrs *(*ScheduleDAGCtor)(MachineSchedContext *); + + // RegisterPassParser requires a (misnamed) FunctionPassCtor type. + typedef ScheduleDAGCtor FunctionPassCtor; + + static MachinePassRegistry Registry; + + MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C) + : MachinePassRegistryNode(N, D, (MachinePassCtor)C) { + Registry.Add(this); + } + ~MachineSchedRegistry() { Registry.Remove(this); } + + // Accessors. + // + MachineSchedRegistry *getNext() const { + return (MachineSchedRegistry *)MachinePassRegistryNode::getNext(); + } + static MachineSchedRegistry *getList() { + return (MachineSchedRegistry *)Registry.getList(); + } + static ScheduleDAGCtor getDefault() { + return (ScheduleDAGCtor)Registry.getDefault(); + } + static void setDefault(ScheduleDAGCtor C) { + Registry.setDefault((MachinePassCtor)C); + } + static void setListener(MachinePassRegistryListener *L) { + Registry.setListener(L); + } +}; + +} // namespace llvm + +#endif diff --git a/contrib/llvm/include/llvm/CodeGen/ObjectCodeEmitter.h b/contrib/llvm/include/llvm/CodeGen/ObjectCodeEmitter.h deleted file mode 100644 index d46628caae79..000000000000 --- a/contrib/llvm/include/llvm/CodeGen/ObjectCodeEmitter.h +++ /dev/null @@ -1,171 +0,0 @@ -//===-- llvm/CodeGen/ObjectCodeEmitter.h - Object Code Emitter -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Generalized Object Code Emitter, works with ObjectModule and BinaryObject. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_OBJECTCODEEMITTER_H -#define LLVM_CODEGEN_OBJECTCODEEMITTER_H - -#include "llvm/CodeGen/MachineCodeEmitter.h" - -namespace llvm { - -class BinaryObject; -class MachineBasicBlock; -class MachineCodeEmitter; -class MachineFunction; -class MachineConstantPool; -class MachineJumpTableInfo; -class MachineModuleInfo; - -class ObjectCodeEmitter : public MachineCodeEmitter { -protected: - - /// Binary Object (Section or Segment) we are emitting to. - BinaryObject *BO; - - /// MBBLocations - This vector is a mapping from MBB ID's to their address. - /// It is filled in by the StartMachineBasicBlock callback and queried by - /// the getMachineBasicBlockAddress callback. - std::vector MBBLocations; - - /// LabelLocations - This vector is a mapping from Label ID's to their - /// address. - std::vector LabelLocations; - - /// CPLocations - This is a map of constant pool indices to offsets from the - /// start of the section for that constant pool index. - std::vector CPLocations; - - /// CPSections - This is a map of constant pool indices to the Section - /// containing the constant pool entry for that index. - std::vector CPSections; - - /// JTLocations - This is a map of jump table indices to offsets from the - /// start of the section for that jump table index. - std::vector JTLocations; - -public: - ObjectCodeEmitter(); - ObjectCodeEmitter(BinaryObject *bo); - virtual ~ObjectCodeEmitter(); - - /// setBinaryObject - set the BinaryObject we are writting to - void setBinaryObject(BinaryObject *bo); - - /// emitByte - This callback is invoked when a byte needs to be - /// written to the data stream, without buffer overflow testing. - void emitByte(uint8_t B); - - /// emitWordLE - This callback is invoked when a 32-bit word needs to be - /// written to the data stream in little-endian format. - void emitWordLE(uint32_t W); - - /// emitWordBE - This callback is invoked when a 32-bit word needs to be - /// written to the data stream in big-endian format. - void emitWordBE(uint32_t W); - - /// emitDWordLE - This callback is invoked when a 64-bit word needs to be - /// written to the data stream in little-endian format. - void emitDWordLE(uint64_t W); - - /// emitDWordBE - This callback is invoked when a 64-bit word needs to be - /// written to the data stream in big-endian format. - void emitDWordBE(uint64_t W); - - /// emitAlignment - Move the CurBufferPtr pointer up to the specified - /// alignment (saturated to BufferEnd of course). - void emitAlignment(unsigned Alignment = 0, uint8_t fill = 0); - - /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be - /// written to the data stream. - void emitULEB128Bytes(uint64_t Value); - - /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be - /// written to the data stream. - void emitSLEB128Bytes(uint64_t Value); - - /// emitString - This callback is invoked when a String needs to be - /// written to the data stream. - void emitString(const std::string &String); - - /// getCurrentPCValue - This returns the address that the next emitted byte - /// will be output to. - uintptr_t getCurrentPCValue() const; - - /// getCurrentPCOffset - Return the offset from the start of the emitted - /// buffer that we are currently writing to. - uintptr_t getCurrentPCOffset() const; - - /// addRelocation - Whenever a relocatable address is needed, it should be - /// noted with this interface. - void addRelocation(const MachineRelocation& relocation); - - /// earlyResolveAddresses - True if the code emitter can use symbol addresses - /// during code emission time. The JIT is capable of doing this because it - /// creates jump tables or constant pools in memory on the fly while the - /// object code emitters rely on a linker to have real addresses and should - /// use relocations instead. - bool earlyResolveAddresses() const { return false; } - - /// startFunction - This callback is invoked when the specified function is - /// about to be code generated. This initializes the BufferBegin/End/Ptr - /// fields. - virtual void startFunction(MachineFunction &F) = 0; - - /// finishFunction - This callback is invoked when the specified function has - /// finished code generation. If a buffer overflow has occurred, this method - /// returns true (the callee is required to try again), otherwise it returns - /// false. - virtual bool finishFunction(MachineFunction &F) = 0; - - /// StartMachineBasicBlock - This should be called by the target when a new - /// basic block is about to be emitted. This way the MCE knows where the - /// start of the block is, and can implement getMachineBasicBlockAddress. - virtual void StartMachineBasicBlock(MachineBasicBlock *MBB); - - /// getMachineBasicBlockAddress - Return the address of the specified - /// MachineBasicBlock, only usable after the label for the MBB has been - /// emitted. - virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const; - - /// emitJumpTables - Emit all the jump tables for a given jump table info - /// record to the appropriate section. - virtual void emitJumpTables(MachineJumpTableInfo *MJTI) = 0; - - /// getJumpTableEntryAddress - Return the address of the jump table with index - /// 'Index' in the function that last called initJumpTableInfo. - virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const; - - /// emitConstantPool - For each constant pool entry, figure out which section - /// the constant should live in, allocate space for it, and emit it to the - /// Section data buffer. - virtual void emitConstantPool(MachineConstantPool *MCP) = 0; - - /// getConstantPoolEntryAddress - Return the address of the 'Index' entry in - /// the constant pool that was last emitted with the emitConstantPool method. - virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const; - - /// getConstantPoolEntrySection - Return the section of the 'Index' entry in - /// the constant pool that was last emitted with the emitConstantPool method. - virtual uintptr_t getConstantPoolEntrySection(unsigned Index) const; - - /// Specifies the MachineModuleInfo object. This is used for exception handling - /// purposes. - virtual void setModuleInfo(MachineModuleInfo* Info) = 0; - // to be implemented or depreciated with MachineModuleInfo - -}; // end class ObjectCodeEmitter - -} // end namespace llvm - -#endif - diff --git a/contrib/llvm/include/llvm/CodeGen/PBQP/Graph.h b/contrib/llvm/include/llvm/CodeGen/PBQP/Graph.h index 5240729f52d0..a5d8b0dbd6a7 100644 --- a/contrib/llvm/include/llvm/CodeGen/PBQP/Graph.h +++ b/contrib/llvm/include/llvm/CodeGen/PBQP/Graph.h @@ -350,6 +350,43 @@ namespace PBQP { numNodes = numEdges = 0; } + /// \brief Dump a graph to an output stream. + template + void dump(OStream &os) { + os << getNumNodes() << " " << getNumEdges() << "\n"; + + for (NodeItr nodeItr = nodesBegin(), nodeEnd = nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + const Vector& v = getNodeCosts(nodeItr); + os << "\n" << v.getLength() << "\n"; + assert(v.getLength() != 0 && "Empty vector in graph."); + os << v[0]; + for (unsigned i = 1; i < v.getLength(); ++i) { + os << " " << v[i]; + } + os << "\n"; + } + + for (EdgeItr edgeItr = edgesBegin(), edgeEnd = edgesEnd(); + edgeItr != edgeEnd; ++edgeItr) { + unsigned n1 = std::distance(nodesBegin(), getEdgeNode1(edgeItr)); + unsigned n2 = std::distance(nodesBegin(), getEdgeNode2(edgeItr)); + assert(n1 != n2 && "PBQP graphs shound not have self-edges."); + const Matrix& m = getEdgeCosts(edgeItr); + os << "\n" << n1 << " " << n2 << "\n" + << m.getRows() << " " << m.getCols() << "\n"; + assert(m.getRows() != 0 && "No rows in matrix."); + assert(m.getCols() != 0 && "No cols in matrix."); + for (unsigned i = 0; i < m.getRows(); ++i) { + os << m[i][0]; + for (unsigned j = 1; j < m.getCols(); ++j) { + os << " " << m[i][j]; + } + os << "\n"; + } + } + } + /// \brief Print a representation of this graph in DOT format. /// @param os Output stream to print on. template diff --git a/contrib/llvm/include/llvm/CodeGen/PBQP/HeuristicBase.h b/contrib/llvm/include/llvm/CodeGen/PBQP/HeuristicBase.h index 791c227f0d07..3fee18cc42d9 100644 --- a/contrib/llvm/include/llvm/CodeGen/PBQP/HeuristicBase.h +++ b/contrib/llvm/include/llvm/CodeGen/PBQP/HeuristicBase.h @@ -157,7 +157,7 @@ namespace PBQP { case 0: s.applyR0(nItr); break; case 1: s.applyR1(nItr); break; case 2: s.applyR2(nItr); break; - default: assert(false && + default: llvm_unreachable( "Optimal reductions of degree > 2 nodes is invalid."); } @@ -186,7 +186,7 @@ namespace PBQP { /// \brief Add a node to the heuristic reduce list. /// @param nItr Node iterator to add to the heuristic reduce list. void addToHeuristicList(Graph::NodeItr nItr) { - assert(false && "Must be implemented in derived class."); + llvm_unreachable("Must be implemented in derived class."); } /// \brief Heuristically reduce one of the nodes in the heuristic @@ -194,25 +194,25 @@ namespace PBQP { /// @return True if a reduction takes place, false if the heuristic reduce /// list is empty. void heuristicReduce() { - assert(false && "Must be implemented in derived class."); + llvm_unreachable("Must be implemented in derived class."); } /// \brief Prepare a change in the costs on the given edge. /// @param eItr Edge iterator. void preUpdateEdgeCosts(Graph::EdgeItr eItr) { - assert(false && "Must be implemented in derived class."); + llvm_unreachable("Must be implemented in derived class."); } /// \brief Handle the change in the costs on the given edge. /// @param eItr Edge iterator. void postUpdateEdgeCostts(Graph::EdgeItr eItr) { - assert(false && "Must be implemented in derived class."); + llvm_unreachable("Must be implemented in derived class."); } /// \brief Handle the addition of a new edge into the PBQP graph. /// @param eItr Edge iterator for the added edge. void handleAddEdge(Graph::EdgeItr eItr) { - assert(false && "Must be implemented in derived class."); + llvm_unreachable("Must be implemented in derived class."); } /// \brief Handle disconnection of an edge from a node. @@ -223,7 +223,7 @@ namespace PBQP { /// method allows for the effect to be computed only for the remaining /// node in the graph. void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) { - assert(false && "Must be implemented in derived class."); + llvm_unreachable("Must be implemented in derived class."); } /// \brief Clean up any structures used by HeuristicBase. diff --git a/contrib/llvm/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h b/contrib/llvm/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h index e96c4cb1e0c1..a859e5899f06 100644 --- a/contrib/llvm/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h +++ b/contrib/llvm/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h @@ -418,6 +418,12 @@ namespace PBQP { unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1; nd.numDenied = 0; + const Vector& nCosts = getGraph().getNodeCosts(nItr); + for (unsigned i = 1; i < nCosts.getLength(); ++i) { + if (nCosts[i] == std::numeric_limits::infinity()) + ++nd.numDenied; + } + nd.numSafe = numRegs; nd.unsafeDegrees.resize(numRegs, 0); diff --git a/contrib/llvm/include/llvm/CodeGen/Passes.h b/contrib/llvm/include/llvm/CodeGen/Passes.h index 7a03ce905d89..3b3819985880 100644 --- a/contrib/llvm/include/llvm/CodeGen/Passes.h +++ b/contrib/llvm/include/llvm/CodeGen/Passes.h @@ -15,6 +15,7 @@ #ifndef LLVM_CODEGEN_PASSES_H #define LLVM_CODEGEN_PASSES_H +#include "llvm/Pass.h" #include "llvm/Target/TargetMachine.h" #include @@ -26,7 +27,211 @@ namespace llvm { class TargetLowering; class TargetRegisterClass; class raw_ostream; +} +namespace llvm { + +extern char &NoPassID; // Allow targets to choose not to run a pass. + +class PassConfigImpl; + +/// Target-Independent Code Generator Pass Configuration Options. +/// +/// This is an ImmutablePass solely for the purpose of exposing CodeGen options +/// to the internals of other CodeGen passes. +class TargetPassConfig : public ImmutablePass { +public: + /// Pseudo Pass IDs. These are defined within TargetPassConfig because they + /// are unregistered pass IDs. They are only useful for use with + /// TargetPassConfig APIs to identify multiple occurrences of the same pass. + /// + + /// EarlyTailDuplicate - A clone of the TailDuplicate pass that runs early + /// during codegen, on SSA form. + static char EarlyTailDuplicateID; + + /// PostRAMachineLICM - A clone of the LICM pass that runs during late machine + /// optimization after regalloc. + static char PostRAMachineLICMID; + +protected: + TargetMachine *TM; + PassManagerBase &PM; + PassConfigImpl *Impl; // Internal data structures + bool Initialized; // Flagged after all passes are configured. + + // Target Pass Options + // Targets provide a default setting, user flags override. + // + bool DisableVerify; + + /// Default setting for -enable-tail-merge on this target. + bool EnableTailMerge; + +public: + TargetPassConfig(TargetMachine *tm, PassManagerBase &pm); + // Dummy constructor. + TargetPassConfig(); + + virtual ~TargetPassConfig(); + + static char ID; + + /// Get the right type of TargetMachine for this target. + template TMC &getTM() const { + return *static_cast(TM); + } + + const TargetLowering *getTargetLowering() const { + return TM->getTargetLowering(); + } + + // + void setInitialized() { Initialized = true; } + + CodeGenOpt::Level getOptLevel() const { return TM->getOptLevel(); } + + void setDisableVerify(bool Disable) { setOpt(DisableVerify, Disable); } + + bool getEnableTailMerge() const { return EnableTailMerge; } + void setEnableTailMerge(bool Enable) { setOpt(EnableTailMerge, Enable); } + + /// Allow the target to override a specific pass without overriding the pass + /// pipeline. When passes are added to the standard pipeline at the + /// point where StadardID is expected, add TargetID in its place. + void substitutePass(char &StandardID, char &TargetID); + + /// Allow the target to enable a specific standard pass by default. + void enablePass(char &ID) { substitutePass(ID, ID); } + + /// Allow the target to disable a specific standard pass by default. + void disablePass(char &ID) { substitutePass(ID, NoPassID); } + + /// Return the pass ssubtituted for StandardID by the target. + /// If no substitution exists, return StandardID. + AnalysisID getPassSubstitution(AnalysisID StandardID) const; + + /// Return true if the optimized regalloc pipeline is enabled. + bool getOptimizeRegAlloc() const; + + /// Add common target configurable passes that perform LLVM IR to IR + /// transforms following machine independent optimization. + virtual void addIRPasses(); + + /// Add common passes that perform LLVM IR to IR transforms in preparation for + /// instruction selection. + virtual void addISelPrepare(); + + /// addInstSelector - This method should install an instruction selector pass, + /// which converts from LLVM code to machine instructions. + virtual bool addInstSelector() { + return true; + } + + /// Add the complete, standard set of LLVM CodeGen passes. + /// Fully developed targets will not generally override this. + virtual void addMachinePasses(); + +protected: + // Helper to verify the analysis is really immutable. + void setOpt(bool &Opt, bool Val); + + /// Methods with trivial inline returns are convenient points in the common + /// codegen pass pipeline where targets may insert passes. Methods with + /// out-of-line standard implementations are major CodeGen stages called by + /// addMachinePasses. Some targets may override major stages when inserting + /// passes is insufficient, but maintaining overriden stages is more work. + /// + + /// addPreISelPasses - This method should add any "last minute" LLVM->LLVM + /// passes (which are run just before instruction selector). + virtual bool addPreISel() { + return true; + } + + /// addMachineSSAOptimization - Add standard passes that optimize machine + /// instructions in SSA form. + virtual void addMachineSSAOptimization(); + + /// addPreRegAlloc - This method may be implemented by targets that want to + /// run passes immediately before register allocation. This should return + /// true if -print-machineinstrs should print after these passes. + virtual bool addPreRegAlloc() { + return false; + } + + /// createTargetRegisterAllocator - Create the register allocator pass for + /// this target at the current optimization level. + virtual FunctionPass *createTargetRegisterAllocator(bool Optimized); + + /// addFastRegAlloc - Add the minimum set of target-independent passes that + /// are required for fast register allocation. + virtual void addFastRegAlloc(FunctionPass *RegAllocPass); + + /// addOptimizedRegAlloc - Add passes related to register allocation. + /// LLVMTargetMachine provides standard regalloc passes for most targets. + virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass); + + /// addFinalizeRegAlloc - This method may be implemented by targets that want + /// to run passes within the regalloc pipeline, immediately after the register + /// allocation pass itself. These passes run as soon as virtual regisiters + /// have been rewritten to physical registers but before and other postRA + /// optimization happens. Targets that have marked instructions for bundling + /// must have finalized those bundles by the time these passes have run, + /// because subsequent passes are not guaranteed to be bundle-aware. + virtual bool addFinalizeRegAlloc() { + return false; + } + + /// addPostRegAlloc - This method may be implemented by targets that want to + /// run passes after register allocation pass pipeline but before + /// prolog-epilog insertion. This should return true if -print-machineinstrs + /// should print after these passes. + virtual bool addPostRegAlloc() { + return false; + } + + /// Add passes that optimize machine instructions after register allocation. + virtual void addMachineLateOptimization(); + + /// addPreSched2 - This method may be implemented by targets that want to + /// run passes after prolog-epilog insertion and before the second instruction + /// scheduling pass. This should return true if -print-machineinstrs should + /// print after these passes. + virtual bool addPreSched2() { + return false; + } + + /// Add standard basic block placement passes. + virtual void addBlockPlacement(); + + /// addPreEmitPass - This pass may be implemented by targets that want to run + /// passes immediately before machine code is emitted. This should return + /// true if -print-machineinstrs should print out the code after the passes. + virtual bool addPreEmitPass() { + return false; + } + + /// Utilities for targets to add passes to the pass manager. + /// + + /// Add a CodeGen pass at this point in the pipeline after checking overrides. + /// Return the pass that was added, or NoPassID. + AnalysisID addPass(char &ID); + + /// addMachinePasses helper to create the target-selected or overriden + /// regalloc pass. + FunctionPass *createRegAllocPass(bool Optimized); + + /// printAndVerify - Add a pass to dump then verify the machine function, if + /// those steps are enabled. + /// + void printAndVerify(const char *Banner) const; +}; +} // namespace llvm + +/// List of target independent CodeGen pass IDs. +namespace llvm { /// createUnreachableBlockEliminationPass - The LLVM code generator does not /// work well with unreachable basic blocks (what live ranges make sense for a /// block that cannot be reached?). As such, a code generator should either @@ -41,31 +246,29 @@ namespace llvm { createMachineFunctionPrinterPass(raw_ostream &OS, const std::string &Banner =""); - /// MachineLoopInfo pass - This pass is a loop analysis pass. - /// + /// MachineLoopInfo - This pass is a loop analysis pass. extern char &MachineLoopInfoID; - /// MachineLoopRanges pass - This pass is an on-demand loop coverage - /// analysis pass. - /// + /// MachineLoopRanges - This pass is an on-demand loop coverage analysis. extern char &MachineLoopRangesID; - /// MachineDominators pass - This pass is a machine dominators analysis pass. - /// + /// MachineDominators - This pass is a machine dominators analysis pass. extern char &MachineDominatorsID; /// EdgeBundles analysis - Bundle machine CFG edges. - /// extern char &EdgeBundlesID; - /// PHIElimination pass - This pass eliminates machine instruction PHI nodes + /// LiveVariables pass - This pass computes the set of blocks in which each + /// variable is life and sets machine operand kill flags. + extern char &LiveVariablesID; + + /// PHIElimination - This pass eliminates machine instruction PHI nodes /// by inserting copy instructions. This destroys SSA information, but is the /// desired input for some register allocators. This pass is "required" by /// these register allocator like this: AU.addRequiredID(PHIEliminationID); - /// extern char &PHIEliminationID; - /// StrongPHIElimination pass - This pass eliminates machine instruction PHI + /// StrongPHIElimination - This pass eliminates machine instruction PHI /// nodes by inserting copy instructions. This destroys SSA information, but /// is the desired input for some register allocators. This pass is /// "required" by these register allocator like this: @@ -76,32 +279,30 @@ namespace llvm { /// LiveStacks pass. An analysis keeping track of the liveness of stack slots. extern char &LiveStacksID; - /// TwoAddressInstruction pass - This pass reduces two-address instructions to + /// TwoAddressInstruction - This pass reduces two-address instructions to /// use two operands. This destroys SSA information but it is desired by /// register allocators. extern char &TwoAddressInstructionPassID; - /// RegisteCoalescer pass - This pass merges live ranges to eliminate copies. - extern char &RegisterCoalescerPassID; + /// ProcessImpicitDefs pass - This pass removes IMPLICIT_DEFs. + extern char &ProcessImplicitDefsID; + + /// RegisterCoalescer - This pass merges live ranges to eliminate copies. + extern char &RegisterCoalescerID; + + /// MachineScheduler - This pass schedules machine instructions. + extern char &MachineSchedulerID; /// SpillPlacement analysis. Suggest optimal placement of spill code between /// basic blocks. - /// extern char &SpillPlacementID; - /// UnreachableMachineBlockElimination pass - This pass removes unreachable + /// UnreachableMachineBlockElimination - This pass removes unreachable /// machine basic blocks. extern char &UnreachableMachineBlockElimID; - /// DeadMachineInstructionElim pass - This pass removes dead machine - /// instructions. - /// - FunctionPass *createDeadMachineInstructionElimPass(); - - /// Creates a register allocator as the user specified on the command line, or - /// picks one that matches OptLevel. - /// - FunctionPass *createRegisterAllocator(CodeGenOpt::Level OptLevel); + /// DeadMachineInstructionElim - This pass removes dead machine instructions. + extern char &DeadMachineInstructionElimID; /// FastRegisterAllocation Pass - This pass register allocates as fast as /// possible. It is best suited for debug code where live ranges are short. @@ -118,56 +319,59 @@ namespace llvm { /// FunctionPass *createGreedyRegisterAllocator(); - /// LinearScanRegisterAllocation Pass - This pass implements the linear scan - /// register allocation algorithm, a global register allocator. - /// - FunctionPass *createLinearScanRegisterAllocator(); - /// PBQPRegisterAllocation Pass - This pass implements the Partitioned Boolean /// Quadratic Prograaming (PBQP) based register allocator. /// FunctionPass *createDefaultPBQPRegisterAllocator(); - /// PrologEpilogCodeInserter Pass - This pass inserts prolog and epilog code, + /// PrologEpilogCodeInserter - This pass inserts prolog and epilog code, /// and eliminates abstract frame references. - /// - FunctionPass *createPrologEpilogCodeInserter(); + extern char &PrologEpilogCodeInserterID; - /// ExpandPostRAPseudos Pass - This pass expands pseudo instructions after + /// ExpandPostRAPseudos - This pass expands pseudo instructions after /// register allocation. - /// - FunctionPass *createExpandPostRAPseudosPass(); + extern char &ExpandPostRAPseudosID; /// createPostRAScheduler - This pass performs post register allocation /// scheduling. - FunctionPass *createPostRAScheduler(CodeGenOpt::Level OptLevel); + extern char &PostRASchedulerID; - /// BranchFolding Pass - This pass performs machine code CFG based + /// BranchFolding - This pass performs machine code CFG based /// optimizations to delete branches to branches, eliminate branches to /// successor blocks (creating fall throughs), and eliminating branches over /// branches. - FunctionPass *createBranchFoldingPass(bool DefaultEnableTailMerge); + extern char &BranchFolderPassID; - /// TailDuplicate Pass - Duplicate blocks with unconditional branches + /// TailDuplicate - Duplicate blocks with unconditional branches /// into tails of their predecessors. - FunctionPass *createTailDuplicatePass(bool PreRegAlloc = false); + extern char &TailDuplicateID; - /// IfConverter Pass - This pass performs machine code if conversion. - FunctionPass *createIfConverterPass(); + /// IfConverter - This pass performs machine code if conversion. + extern char &IfConverterID; - /// Code Placement Pass - This pass optimize code placement and aligns loop + /// MachineBlockPlacement - This pass places basic blocks based on branch + /// probabilities. + extern char &MachineBlockPlacementID; + + /// MachineBlockPlacementStats - This pass collects statistics about the + /// basic block placement using branch probabilities and block frequency + /// information. + extern char &MachineBlockPlacementStatsID; + + /// Code Placement - This pass optimize code placement and aligns loop /// headers to target specific alignment boundary. - FunctionPass *createCodePlacementOptPass(); + extern char &CodePlacementOptID; - /// IntrinsicLowering Pass - Performs target-independent LLVM IR - /// transformations for highly portable strategies. + /// GCLowering Pass - Performs target-independent LLVM IR transformations for + /// highly portable strategies. + /// FunctionPass *createGCLoweringPass(); - /// MachineCodeAnalysis Pass - Target-independent pass to mark safe points in - /// machine code. Must be added very late during code generation, just prior - /// to output, and importantly after all CFG transformations (such as branch - /// folding). - FunctionPass *createGCMachineCodeAnalysisPass(); + /// GCMachineCodeAnalysis - Target-independent pass to mark safe points + /// in machine code. Must be added very late during code generation, just + /// prior to output, and importantly after all CFG transformations (such as + /// branch folding). + extern char &GCMachineCodeAnalysisID; /// Deleter Pass - Releases GC metadata. /// @@ -177,54 +381,56 @@ namespace llvm { /// FunctionPass *createGCInfoPrinter(raw_ostream &OS); - /// createMachineCSEPass - This pass performs global CSE on machine - /// instructions. - FunctionPass *createMachineCSEPass(); + /// MachineCSE - This pass performs global CSE on machine instructions. + extern char &MachineCSEID; - /// createMachineLICMPass - This pass performs LICM on machine instructions. - /// - FunctionPass *createMachineLICMPass(bool PreRegAlloc = true); + /// MachineLICM - This pass performs LICM on machine instructions. + extern char &MachineLICMID; - /// createMachineSinkingPass - This pass performs sinking on machine - /// instructions. - FunctionPass *createMachineSinkingPass(); + /// MachineSinking - This pass performs sinking on machine instructions. + extern char &MachineSinkingID; - /// createPeepholeOptimizerPass - This pass performs peephole optimizations - + /// MachineCopyPropagation - This pass performs copy propagation on + /// machine instructions. + extern char &MachineCopyPropagationID; + + /// PeepholeOptimizer - This pass performs peephole optimizations - /// like extension and comparison eliminations. - FunctionPass *createPeepholeOptimizerPass(); + extern char &PeepholeOptimizerID; - /// createOptimizePHIsPass - This pass optimizes machine instruction PHIs + /// OptimizePHIs - This pass optimizes machine instruction PHIs /// to take advantage of opportunities created during DAG legalization. - FunctionPass *createOptimizePHIsPass(); + extern char &OptimizePHIsID; - /// createStackSlotColoringPass - This pass performs stack slot coloring. - FunctionPass *createStackSlotColoringPass(bool); + /// StackSlotColoring - This pass performs stack slot coloring. + extern char &StackSlotColoringID; /// createStackProtectorPass - This pass adds stack protectors to functions. + /// FunctionPass *createStackProtectorPass(const TargetLowering *tli); /// createMachineVerifierPass - This pass verifies cenerated machine code /// instructions for correctness. + /// FunctionPass *createMachineVerifierPass(const char *Banner = 0); /// createDwarfEHPass - This pass mulches exception handling code into a form /// adapted to code generation. Required if using dwarf exception handling. FunctionPass *createDwarfEHPass(const TargetMachine *tm); - /// createSjLjEHPass - This pass adapts exception handling code to use + /// createSjLjEHPreparePass - This pass adapts exception handling code to use /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow. - FunctionPass *createSjLjEHPass(const TargetLowering *tli); - - /// createLocalStackSlotAllocationPass - This pass assigns local frame - /// indices to stack slots relative to one another and allocates - /// base registers to access them when it is estimated by the target to - /// be out of range of normal frame pointer or stack pointer index - /// addressing. - FunctionPass *createLocalStackSlotAllocationPass(); - - /// createExpandISelPseudosPass - This pass expands pseudo-instructions. /// - FunctionPass *createExpandISelPseudosPass(); + FunctionPass *createSjLjEHPreparePass(const TargetLowering *tli); + + /// LocalStackSlotAllocation - This pass assigns local frame indices to stack + /// slots relative to one another and allocates base registers to access them + /// when it is estimated by the target to be out of range of normal frame + /// pointer or stack pointer index addressing. + extern char &LocalStackSlotAllocationID; + + /// ExpandISelPseudos - This pass expands pseudo-instructions. + extern char &ExpandISelPseudosID; /// createExecutionDependencyFixPass - This pass fixes execution time /// problems with dependent instructions, such as switching execution @@ -234,6 +440,13 @@ namespace llvm { /// FunctionPass *createExecutionDependencyFixPass(const TargetRegisterClass *RC); + /// UnpackMachineBundles - This pass unpack machine instruction bundles. + extern char &UnpackMachineBundlesID; + + /// FinalizeMachineBundles - This pass finalize machine instruction + /// bundles (created earlier, e.g. during pre-RA scheduling). + extern char &FinalizeMachineBundlesID; + } // End llvm namespace #endif diff --git a/contrib/llvm/include/llvm/CodeGen/RegisterScavenging.h b/contrib/llvm/include/llvm/CodeGen/RegisterScavenging.h index 26b6773c0530..3986a8dd7da1 100644 --- a/contrib/llvm/include/llvm/CodeGen/RegisterScavenging.h +++ b/contrib/llvm/include/llvm/CodeGen/RegisterScavenging.h @@ -68,6 +68,10 @@ class RegScavenger { /// available, unset means the register is currently being used. BitVector RegsAvailable; + // These BitVectors are only used internally to forward(). They are members + // to avoid frequent reallocations. + BitVector KillRegs, DefRegs; + public: RegScavenger() : MBB(NULL), NumPhysRegs(0), Tracking(false), @@ -130,8 +134,9 @@ class RegScavenger { /// isUsed / isUnused - Test if a register is currently being used. /// - bool isUsed(unsigned Reg) const { return !RegsAvailable.test(Reg); } - bool isUnused(unsigned Reg) const { return RegsAvailable.test(Reg); } + bool isUsed(unsigned Reg) const { + return !RegsAvailable.test(Reg) || ReservedRegs.test(Reg); + } /// isAliasUsed - Is Reg or an alias currently in use? bool isAliasUsed(unsigned Reg) const; @@ -139,7 +144,7 @@ class RegScavenger { /// setUsed / setUnused - Mark the state of one or a number of registers. /// void setUsed(BitVector &Regs) { - RegsAvailable &= ~Regs; + RegsAvailable.reset(Regs); } void setUnused(BitVector &Regs) { RegsAvailable |= Regs; @@ -148,9 +153,6 @@ class RegScavenger { /// Add Reg and all its sub-registers to BV. void addRegWithSubRegs(BitVector &BV, unsigned Reg); - /// Add Reg and its aliases to BV. - void addRegWithAliases(BitVector &BV, unsigned Reg); - /// findSurvivorReg - Return the candidate register that is unused for the /// longest after StartMI. UseMI is set to the instruction where the search /// stopped. diff --git a/contrib/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h b/contrib/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h new file mode 100644 index 000000000000..56b5855c01c9 --- /dev/null +++ b/contrib/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h @@ -0,0 +1,142 @@ +//===----- ResourcePriorityQueue.h - A DFA-oriented priority queue -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ResourcePriorityQueue class, which is a +// SchedulingPriorityQueue that schedules using DFA state to +// reduce the length of the critical path through the basic block +// on VLIW platforms. +// +//===----------------------------------------------------------------------===// + +#ifndef RESOURCE_PRIORITY_QUEUE_H +#define RESOURCE_PRIORITY_QUEUE_H + +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" + +namespace llvm { + class ResourcePriorityQueue; + + /// Sorting functions for the Available queue. + struct resource_sort : public std::binary_function { + ResourcePriorityQueue *PQ; + explicit resource_sort(ResourcePriorityQueue *pq) : PQ(pq) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; + + class ResourcePriorityQueue : public SchedulingPriorityQueue { + /// SUnits - The SUnits for the current graph. + std::vector *SUnits; + + /// NumNodesSolelyBlocking - This vector contains, for every node in the + /// Queue, the number of nodes that the node is the sole unscheduled + /// predecessor for. This is used as a tie-breaker heuristic for better + /// mobility. + std::vector NumNodesSolelyBlocking; + + /// Queue - The queue. + std::vector Queue; + + /// RegPressure - Tracking current reg pressure per register class. + /// + std::vector RegPressure; + + /// RegLimit - Tracking the number of allocatable registers per register + /// class. + std::vector RegLimit; + + resource_sort Picker; + const TargetRegisterInfo *TRI; + const TargetLowering *TLI; + const TargetInstrInfo *TII; + const InstrItineraryData* InstrItins; + /// ResourcesModel - Represents VLIW state. + /// Not limited to VLIW targets per say, but assumes + /// definition of DFA by a target. + DFAPacketizer *ResourcesModel; + + /// Resource model - packet/bundle model. Purely + /// internal at the time. + std::vector Packet; + + /// Heuristics for estimating register pressure. + unsigned ParallelLiveRanges; + signed HorizontalVerticalBalance; + + public: + ResourcePriorityQueue(SelectionDAGISel *IS); + + ~ResourcePriorityQueue() { + delete ResourcesModel; + } + + bool isBottomUp() const { return false; } + + void initNodes(std::vector &sunits); + + void addNode(const SUnit *SU) { + NumNodesSolelyBlocking.resize(SUnits->size(), 0); + } + + void updateNode(const SUnit *SU) {} + + void releaseState() { + SUnits = 0; + } + + unsigned getLatency(unsigned NodeNum) const { + assert(NodeNum < (*SUnits).size()); + return (*SUnits)[NodeNum].getHeight(); + } + + unsigned getNumSolelyBlockNodes(unsigned NodeNum) const { + assert(NodeNum < NumNodesSolelyBlocking.size()); + return NumNodesSolelyBlocking[NodeNum]; + } + + /// Single cost function reflecting benefit of scheduling SU + /// in the current cycle. + signed SUSchedulingCost (SUnit *SU); + + /// InitNumRegDefsLeft - Determine the # of regs defined by this node. + /// + void initNumRegDefsLeft(SUnit *SU); + void updateNumRegDefsLeft(SUnit *SU); + signed regPressureDelta(SUnit *SU, bool RawPressure = false); + signed rawRegPressureDelta (SUnit *SU, unsigned RCId); + + bool empty() const { return Queue.empty(); } + + virtual void push(SUnit *U); + + virtual SUnit *pop(); + + virtual void remove(SUnit *SU); + + virtual void dump(ScheduleDAG* DAG) const; + + /// scheduledNode - Main resource tracking point. + void scheduledNode(SUnit *Node); + bool isResourceAvailable(SUnit *SU); + void reserveResources(SUnit *SU); + +private: + void adjustPriorityOfUnscheduledPreds(SUnit *SU); + SUnit *getSingleUnscheduledPred(SUnit *SU); + unsigned numberRCValPredInSU (SUnit *SU, unsigned RCId); + unsigned numberRCValSuccInSU (SUnit *SU, unsigned RCId); + }; +} + +#endif diff --git a/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h b/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h index 1bbc6c54d7fa..f4de6933b317 100644 --- a/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h +++ b/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h @@ -8,7 +8,8 @@ //===----------------------------------------------------------------------===// // // This file implements the ScheduleDAG class, which is used as the common -// base class for instruction schedulers. +// base class for instruction schedulers. This encapsulates the scheduling DAG, +// which is shared between SelectionDAG and MachineInstr scheduling. // //===----------------------------------------------------------------------===// @@ -16,7 +17,7 @@ #define LLVM_CODEGEN_SCHEDULEDAG_H #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/GraphTraits.h" @@ -129,8 +130,7 @@ namespace llvm { Contents.Order.isMustAlias == Other.Contents.Order.isMustAlias && Contents.Order.isArtificial == Other.Contents.Order.isArtificial; } - assert(0 && "Invalid dependency kind!"); - return false; + llvm_unreachable("Invalid dependency kind!"); } bool operator!=(const SDep &Other) const { @@ -232,6 +232,7 @@ namespace llvm { public: SUnit *OrigNode; // If not this, the node from which // this node was cloned. + // (SD scheduling only) // Preds/Succs - The SUnits before/after us in the graph. SmallVector Preds; // All sunit predecessors. @@ -409,6 +410,13 @@ namespace llvm { return false; } + bool isTopReady() const { + return NumPredsLeft == 0; + } + bool isBottomReady() const { + return NumSuccsLeft == 0; + } + void dump(const ScheduleDAG *G) const; void dumpAll(const ScheduleDAG *G) const; void print(raw_ostream &O, const ScheduleDAG *G) const; @@ -427,6 +435,7 @@ namespace llvm { /// implementation to decide. /// class SchedulingPriorityQueue { + virtual void anchor(); unsigned CurCycle; bool HasReadyFilter; public: @@ -465,13 +474,13 @@ namespace llvm { virtual void dump(ScheduleDAG *) const {} - /// ScheduledNode - As each node is scheduled, this method is invoked. This + /// scheduledNode - As each node is scheduled, this method is invoked. This /// allows the priority function to adjust the priority of related /// unscheduled nodes, for example. /// - virtual void ScheduledNode(SUnit *) {} + virtual void scheduledNode(SUnit *) {} - virtual void UnscheduledNode(SUnit *) {} + virtual void unscheduledNode(SUnit *) {} void setCurCycle(unsigned Cycle) { CurCycle = Cycle; @@ -484,15 +493,11 @@ namespace llvm { class ScheduleDAG { public: - MachineBasicBlock *BB; // The block in which to insert instructions - MachineBasicBlock::iterator InsertPos;// The position to insert instructions const TargetMachine &TM; // Target processor const TargetInstrInfo *TII; // Target instruction information const TargetRegisterInfo *TRI; // Target processor register info MachineFunction &MF; // Machine function MachineRegisterInfo &MRI; // Virtual/real register map - std::vector Sequence; // The schedule. Null SUnit*'s - // represent noop instructions. std::vector SUnits; // The scheduling units. SUnit EntrySU; // Special node for the region entry. SUnit ExitSU; // Special node for the region exit. @@ -507,6 +512,9 @@ namespace llvm { virtual ~ScheduleDAG(); + /// clearDAG - clear the DAG state (between regions). + void clearDAG(); + /// getInstrDesc - Return the MCInstrDesc of this SUnit. /// Return NULL for SDNodes without a machine opcode. const MCInstrDesc *getInstrDesc(const SUnit *SU) const { @@ -517,66 +525,43 @@ namespace llvm { /// viewGraph - Pop up a GraphViz/gv window with the ScheduleDAG rendered /// using 'dot'. /// + void viewGraph(const Twine &Name, const Twine &Title); void viewGraph(); - /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock - /// according to the order specified in Sequence. - /// - virtual MachineBasicBlock *EmitSchedule() = 0; - - void dumpSchedule() const; - virtual void dumpNode(const SUnit *SU) const = 0; /// getGraphNodeLabel - Return a label for an SUnit node in a visualization /// of the ScheduleDAG. virtual std::string getGraphNodeLabel(const SUnit *SU) const = 0; + /// getDAGLabel - Return a label for the region of code covered by the DAG. + virtual std::string getDAGName() const = 0; + /// addCustomGraphFeatures - Add custom features for a visualization of /// the ScheduleDAG. virtual void addCustomGraphFeatures(GraphWriter &) const {} #ifndef NDEBUG - /// VerifySchedule - Verify that all SUnits were scheduled and that - /// their state is consistent. - void VerifySchedule(bool isBottomUp); + /// VerifyScheduledDAG - Verify that all SUnits were scheduled and that + /// their state is consistent. Return the number of scheduled SUnits. + unsigned VerifyScheduledDAG(bool isBottomUp); #endif protected: - /// Run - perform scheduling. - /// - void Run(MachineBasicBlock *bb, MachineBasicBlock::iterator insertPos); - - /// BuildSchedGraph - Build SUnits and set up their Preds and Succs - /// to form the scheduling dependency graph. - /// - virtual void BuildSchedGraph(AliasAnalysis *AA) = 0; - /// ComputeLatency - Compute node latency. /// - virtual void ComputeLatency(SUnit *SU) = 0; + virtual void computeLatency(SUnit *SU) = 0; /// ComputeOperandLatency - Override dependence edge latency using /// operand use/def information /// - virtual void ComputeOperandLatency(SUnit *, SUnit *, + virtual void computeOperandLatency(SUnit *, SUnit *, SDep&) const { } - /// Schedule - Order nodes according to selected style, filling - /// in the Sequence member. - /// - virtual void Schedule() = 0; - /// ForceUnitLatencies - Return true if all scheduling edges should be given /// a latency value of one. The default is to return false; schedulers may /// override this as needed. - virtual bool ForceUnitLatencies() const { return false; } - - /// EmitNoop - Emit a noop instruction. - /// - void EmitNoop(); - - void EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap); + virtual bool forceUnitLatencies() const { return false; } private: // Return the MCInstrDesc of this SDNode or NULL. diff --git a/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h new file mode 100644 index 000000000000..c8de7bc8f892 --- /dev/null +++ b/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -0,0 +1,344 @@ +//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ScheduleDAGInstrs class, which implements +// scheduling for a MachineInstr-based dependency graph. +// +//===----------------------------------------------------------------------===// + +#ifndef SCHEDULEDAGINSTRS_H +#define SCHEDULEDAGINSTRS_H + +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SparseSet.h" +#include + +namespace llvm { + class MachineLoopInfo; + class MachineDominatorTree; + class LiveIntervals; + + /// LoopDependencies - This class analyzes loop-oriented register + /// dependencies, which are used to guide scheduling decisions. + /// For example, loop induction variable increments should be + /// scheduled as soon as possible after the variable's last use. + /// + class LoopDependencies { + const MachineLoopInfo &MLI; + const MachineDominatorTree &MDT; + + public: + typedef std::map > + LoopDeps; + LoopDeps Deps; + + LoopDependencies(const MachineLoopInfo &mli, + const MachineDominatorTree &mdt) : + MLI(mli), MDT(mdt) {} + + /// VisitLoop - Clear out any previous state and analyze the given loop. + /// + void VisitLoop(const MachineLoop *Loop) { + assert(Deps.empty() && "stale loop dependencies"); + + MachineBasicBlock *Header = Loop->getHeader(); + SmallSet LoopLiveIns; + for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(), + LE = Header->livein_end(); LI != LE; ++LI) + LoopLiveIns.insert(*LI); + + const MachineDomTreeNode *Node = MDT.getNode(Header); + const MachineBasicBlock *MBB = Node->getBlock(); + assert(Loop->contains(MBB) && + "Loop does not contain header!"); + VisitRegion(Node, MBB, Loop, LoopLiveIns); + } + + private: + void VisitRegion(const MachineDomTreeNode *Node, + const MachineBasicBlock *MBB, + const MachineLoop *Loop, + const SmallSet &LoopLiveIns) { + unsigned Count = 0; + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (LoopLiveIns.count(MOReg)) + Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count))); + } + ++Count; // Not every iteration due to dbg_value above. + } + + const std::vector &Children = Node->getChildren(); + for (std::vector::const_iterator I = + Children.begin(), E = Children.end(); I != E; ++I) { + const MachineDomTreeNode *ChildNode = *I; + MachineBasicBlock *ChildBlock = ChildNode->getBlock(); + if (Loop->contains(ChildBlock)) + VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns); + } + } + }; + + /// An individual mapping from virtual register number to SUnit. + struct VReg2SUnit { + unsigned VirtReg; + SUnit *SU; + + VReg2SUnit(unsigned reg, SUnit *su): VirtReg(reg), SU(su) {} + + unsigned getSparseSetKey() const { + return TargetRegisterInfo::virtReg2Index(VirtReg); + } + }; + + /// Combine a SparseSet with a 1x1 vector to track physical registers. + /// The SparseSet allows iterating over the (few) live registers for quickly + /// comparing against a regmask or clearing the set. + /// + /// Storage for the map is allocated once for the pass. The map can be + /// cleared between scheduling regions without freeing unused entries. + class Reg2SUnitsMap { + SparseSet PhysRegSet; + std::vector > SUnits; + public: + typedef SparseSet::const_iterator const_iterator; + + // Allow iteration over register numbers (keys) in the map. If needed, we + // can provide an iterator over SUnits (values) as well. + const_iterator reg_begin() const { return PhysRegSet.begin(); } + const_iterator reg_end() const { return PhysRegSet.end(); } + + /// Initialize the map with the number of registers. + /// If the map is already large enough, no allocation occurs. + /// For simplicity we expect the map to be empty(). + void setRegLimit(unsigned Limit); + + /// Returns true if the map is empty. + bool empty() const { return PhysRegSet.empty(); } + + /// Clear the map without deallocating storage. + void clear(); + + bool contains(unsigned Reg) const { return PhysRegSet.count(Reg); } + + /// If this register is mapped, return its existing SUnits vector. + /// Otherwise map the register and return an empty SUnits vector. + std::vector &operator[](unsigned Reg) { + bool New = PhysRegSet.insert(Reg).second; + assert((!New || SUnits[Reg].empty()) && "stale SUnits vector"); + (void)New; + return SUnits[Reg]; + } + + /// Erase an existing element without freeing memory. + void erase(unsigned Reg) { + PhysRegSet.erase(Reg); + SUnits[Reg].clear(); + } + }; + + /// Use SparseSet as a SparseMap by relying on the fact that it never + /// compares ValueT's, only unsigned keys. This allows the set to be cleared + /// between scheduling regions in constant time as long as ValueT does not + /// require a destructor. + typedef SparseSet VReg2SUnitMap; + + /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of + /// MachineInstrs. + class ScheduleDAGInstrs : public ScheduleDAG { + protected: + const MachineLoopInfo &MLI; + const MachineDominatorTree &MDT; + const MachineFrameInfo *MFI; + const InstrItineraryData *InstrItins; + + /// Live Intervals provides reaching defs in preRA scheduling. + LiveIntervals *LIS; + + /// isPostRA flag indicates vregs cannot be present. + bool IsPostRA; + + /// UnitLatencies (misnamed) flag avoids computing def-use latencies, using + /// the def-side latency only. + bool UnitLatencies; + + /// State specific to the current scheduling region. + /// ------------------------------------------------ + + /// The block in which to insert instructions + MachineBasicBlock *BB; + + /// The beginning of the range to be scheduled. + MachineBasicBlock::iterator RegionBegin; + + /// The end of the range to be scheduled. + MachineBasicBlock::iterator RegionEnd; + + /// The index in BB of RegionEnd. + unsigned EndIndex; + + /// After calling BuildSchedGraph, each machine instruction in the current + /// scheduling region is mapped to an SUnit. + DenseMap MISUnitMap; + + /// State internal to DAG building. + /// ------------------------------- + + /// Defs, Uses - Remember where defs and uses of each register are as we + /// iterate upward through the instructions. This is allocated here instead + /// of inside BuildSchedGraph to avoid the need for it to be initialized and + /// destructed for each block. + Reg2SUnitsMap Defs; + Reg2SUnitsMap Uses; + + /// Track the last instructon in this region defining each virtual register. + VReg2SUnitMap VRegDefs; + + /// PendingLoads - Remember where unknown loads are after the most recent + /// unknown store, as we iterate. As with Defs and Uses, this is here + /// to minimize construction/destruction. + std::vector PendingLoads; + + /// LoopRegs - Track which registers are used for loop-carried dependencies. + /// + LoopDependencies LoopRegs; + + /// DbgValues - Remember instruction that preceeds DBG_VALUE. + /// These are generated by buildSchedGraph but persist so they can be + /// referenced when emitting the final schedule. + typedef std::vector > + DbgValueVector; + DbgValueVector DbgValues; + MachineInstr *FirstDbgValue; + + public: + explicit ScheduleDAGInstrs(MachineFunction &mf, + const MachineLoopInfo &mli, + const MachineDominatorTree &mdt, + bool IsPostRAFlag, + LiveIntervals *LIS = 0); + + virtual ~ScheduleDAGInstrs() {} + + /// begin - Return an iterator to the top of the current scheduling region. + MachineBasicBlock::iterator begin() const { return RegionBegin; } + + /// end - Return an iterator to the bottom of the current scheduling region. + MachineBasicBlock::iterator end() const { return RegionEnd; } + + /// newSUnit - Creates a new SUnit and return a ptr to it. + SUnit *newSUnit(MachineInstr *MI); + + /// getSUnit - Return an existing SUnit for this MI, or NULL. + SUnit *getSUnit(MachineInstr *MI) const; + + /// startBlock - Prepare to perform scheduling in the given block. + virtual void startBlock(MachineBasicBlock *BB); + + /// finishBlock - Clean up after scheduling in the given block. + virtual void finishBlock(); + + /// Initialize the scheduler state for the next scheduling region. + virtual void enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount); + + /// Notify that the scheduler has finished scheduling the current region. + virtual void exitRegion(); + + /// buildSchedGraph - Build SUnits from the MachineBasicBlock that we are + /// input. + void buildSchedGraph(AliasAnalysis *AA); + + /// addSchedBarrierDeps - Add dependencies from instructions in the current + /// list of instructions being scheduled to scheduling barrier. We want to + /// make sure instructions which define registers that are either used by + /// the terminator or are live-out are properly scheduled. This is + /// especially important when the definition latency of the return value(s) + /// are too high to be hidden by the branch or when the liveout registers + /// used by instructions in the fallthrough block. + void addSchedBarrierDeps(); + + /// computeLatency - Compute node latency. + /// + virtual void computeLatency(SUnit *SU); + + /// computeOperandLatency - Override dependence edge latency using + /// operand use/def information + /// + virtual void computeOperandLatency(SUnit *Def, SUnit *Use, + SDep& dep) const; + + /// schedule - Order nodes according to selected style, filling + /// in the Sequence member. + /// + /// Typically, a scheduling algorithm will implement schedule() without + /// overriding enterRegion() or exitRegion(). + virtual void schedule() = 0; + + /// finalizeSchedule - Allow targets to perform final scheduling actions at + /// the level of the whole MachineFunction. By default does nothing. + virtual void finalizeSchedule() {} + + virtual void dumpNode(const SUnit *SU) const; + + /// Return a label for a DAG node that points to an instruction. + virtual std::string getGraphNodeLabel(const SUnit *SU) const; + + /// Return a label for the region of code covered by the DAG. + virtual std::string getDAGName() const; + + protected: + void initSUnits(); + void addPhysRegDataDeps(SUnit *SU, const MachineOperand &MO); + void addPhysRegDeps(SUnit *SU, unsigned OperIdx); + void addVRegDefDeps(SUnit *SU, unsigned OperIdx); + void addVRegUseDeps(SUnit *SU, unsigned OperIdx); + + VReg2SUnitMap::iterator findVRegDef(unsigned VirtReg) { + return VRegDefs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); + } + }; + + /// newSUnit - Creates a new SUnit and return a ptr to it. + inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) { +#ifndef NDEBUG + const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0]; +#endif + SUnits.push_back(SUnit(MI, (unsigned)SUnits.size())); + assert((Addr == 0 || Addr == &SUnits[0]) && + "SUnits std::vector reallocated on the fly!"); + SUnits.back().OrigNode = &SUnits.back(); + return &SUnits.back(); + } + + /// getSUnit - Return an existing SUnit for this MI, or NULL. + inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const { + DenseMap::const_iterator I = MISUnitMap.find(MI); + if (I == MISUnitMap.end()) + return 0; + return I->second; + } +} // namespace llvm + +#endif diff --git a/contrib/llvm/include/llvm/CodeGen/SchedulerRegistry.h b/contrib/llvm/include/llvm/CodeGen/SchedulerRegistry.h index 96573dd5d8b1..a582b0c40c8b 100644 --- a/contrib/llvm/include/llvm/CodeGen/SchedulerRegistry.h +++ b/contrib/llvm/include/llvm/CodeGen/SchedulerRegistry.h @@ -42,7 +42,7 @@ class RegisterScheduler : public MachinePassRegistryNode { : MachinePassRegistryNode(N, D, (MachinePassCtor)C) { Registry.Add(this); } ~RegisterScheduler() { Registry.Remove(this); } - + // Accessors. // @@ -68,11 +68,6 @@ class RegisterScheduler : public MachinePassRegistryNode { ScheduleDAGSDNodes *createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel); -/// createTDRRListDAGScheduler - This creates a top down register usage -/// reduction list scheduler. -ScheduleDAGSDNodes *createTDRRListDAGScheduler(SelectionDAGISel *IS, - CodeGenOpt::Level OptLevel); - /// createBURRListDAGScheduler - This creates a bottom up list scheduler that /// schedules nodes in source code order when possible. ScheduleDAGSDNodes *createSourceListDAGScheduler(SelectionDAGISel *IS, @@ -91,16 +86,17 @@ ScheduleDAGSDNodes *createHybridListDAGScheduler(SelectionDAGISel *IS, /// to reduce register pressure. ScheduleDAGSDNodes *createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level); -/// createTDListDAGScheduler - This creates a top-down list scheduler with -/// a hazard recognizer. -ScheduleDAGSDNodes *createTDListDAGScheduler(SelectionDAGISel *IS, - CodeGenOpt::Level OptLevel); /// createFastDAGScheduler - This creates a "fast" scheduler. /// ScheduleDAGSDNodes *createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel); +/// createVLIWDAGScheduler - Scheduler for VLIW targets. This creates top down +/// DFA driven list scheduler with clustering heuristic to control +/// register pressure. +ScheduleDAGSDNodes *createVLIWDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel); /// createDefaultScheduler - This creates an instruction scheduler appropriate /// for the target. ScheduleDAGSDNodes *createDefaultScheduler(SelectionDAGISel *IS, diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h index 132983c504e7..6a7a87e86636 100644 --- a/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -51,7 +51,7 @@ template<> struct ilist_traits : public ilist_default_traits { static void noteHead(SDNode*, SDNode*) {} static void deleteNode(SDNode *) { - assert(0 && "ilist_traits shouldn't see a deleteNode call!"); + llvm_unreachable("ilist_traits shouldn't see a deleteNode call!"); } private: static void createNode(const SDNode &); @@ -112,9 +112,10 @@ class SDDbgInfo { }; enum CombineLevel { - Unrestricted, // Combine may create illegal operations and illegal types. - NoIllegalTypes, // Combine may create illegal operations but no illegal types. - NoIllegalOperations // Combine may only create legal operations and types. + BeforeLegalizeTypes, + AfterLegalizeTypes, + AfterLegalizeVectorOps, + AfterLegalizeDAG }; class SelectionDAG; @@ -138,6 +139,7 @@ class SelectionDAG { const TargetSelectionDAGInfo &TSI; MachineFunction *MF; LLVMContext *Context; + CodeGenOpt::Level OptLevel; /// EntryNode - The starting token. SDNode EntryNode; @@ -186,7 +188,7 @@ class SelectionDAG { SelectionDAG(const SelectionDAG&); // Do not implement. public: - explicit SelectionDAG(const TargetMachine &TM); + explicit SelectionDAG(const TargetMachine &TM, llvm::CodeGenOpt::Level); ~SelectionDAG(); /// init - Prepare this SelectionDAG to process code in the given @@ -392,6 +394,7 @@ class SelectionDAG { unsigned char TargetFlags = 0); SDValue getValueType(EVT); SDValue getRegister(unsigned Reg, EVT VT); + SDValue getRegisterMask(const uint32_t *RegMask); SDValue getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label); SDValue getBlockAddress(const BlockAddress *BA, EVT VT, bool isTarget = false, unsigned char TargetFlags = 0); @@ -650,8 +653,8 @@ class SelectionDAG { /// SDValue getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, - bool isNonTemporal, unsigned Alignment, - const MDNode *TBAAInfo = 0); + bool isNonTemporal, bool isInvariant, unsigned Alignment, + const MDNode *TBAAInfo = 0, const MDNode *Ranges = 0); SDValue getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, @@ -663,8 +666,9 @@ class SelectionDAG { EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo, EVT MemVT, - bool isVolatile, bool isNonTemporal, unsigned Alignment, - const MDNode *TBAAInfo = 0); + bool isVolatile, bool isNonTemporal, bool isInvariant, + unsigned Alignment, const MDNode *TBAAInfo = 0, + const MDNode *Ranges = 0); SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, @@ -976,8 +980,8 @@ class SelectionDAG { /// bitsets. This code only analyzes bits in Mask, in order to short-circuit /// processing. Targets can implement the computeMaskedBitsForTargetNode /// method in the TargetLowering class to allow target nodes to be understood. - void ComputeMaskedBits(SDValue Op, const APInt &Mask, APInt &KnownZero, - APInt &KnownOne, unsigned Depth = 0) const; + void ComputeMaskedBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, + unsigned Depth = 0) const; /// ComputeNumSignBits - Return the number of times the sign bit of the /// register is replicated into the other bits. We know that at least 1 bit @@ -1033,6 +1037,7 @@ class SelectionDAG { void *&InsertPos); SDNode *FindModifiedNodeSlot(SDNode *N, const SDValue *Ops, unsigned NumOps, void *&InsertPos); + SDNode *UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc loc); void DeleteNodeNotInCSEMaps(SDNode *N); void DeallocateNode(SDNode *N); diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAGISel.h index ecf394701053..ee3f2319c0b3 100644 --- a/contrib/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -29,6 +29,7 @@ namespace llvm { class MachineFunction; class MachineInstr; class TargetLowering; + class TargetLibraryInfo; class TargetInstrInfo; class FunctionLoweringInfo; class ScheduleHazardRecognizer; @@ -42,6 +43,7 @@ class SelectionDAGISel : public MachineFunctionPass { public: const TargetMachine &TM; const TargetLowering &TLI; + const TargetLibraryInfo *LibInfo; FunctionLoweringInfo *FuncInfo; MachineFunction *MF; MachineRegisterInfo *RegInfo; @@ -92,7 +94,7 @@ class SelectionDAGISel : public MachineFunctionPass { /// IsLegalToFold - Returns true if the specific operand node N of /// U can be folded during instruction selection that starts at Root. - /// FIXME: This is a static member function because the MSP430/SystemZ/X86 + /// FIXME: This is a static member function because the MSP430/X86 /// targets, which uses it during isel. This could become a proper member. static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOpt::Level OptLevel, @@ -179,6 +181,7 @@ class SelectionDAGISel : public MachineFunctionPass { /// ISelUpdater - helper class to handle updates of the /// instruction selection graph. class ISelUpdater : public SelectionDAG::DAGUpdateListener { + virtual void anchor(); SelectionDAG::allnodes_iterator &ISelPosition; public: explicit ISelUpdater(SelectionDAG::allnodes_iterator &isp) @@ -237,8 +240,7 @@ class SelectionDAGISel : public MachineFunctionPass { /// succeeds or false if it fails. The number is a private implementation /// detail to the code tblgen produces. virtual bool CheckPatternPredicate(unsigned PredNo) const { - assert(0 && "Tblgen should generate the implementation of this!"); - return 0; + llvm_unreachable("Tblgen should generate the implementation of this!"); } /// CheckNodePredicate - This function is generated by tblgen in the target. @@ -246,20 +248,17 @@ class SelectionDAGISel : public MachineFunctionPass { /// false if it fails. The number is a private implementation /// detail to the code tblgen produces. virtual bool CheckNodePredicate(SDNode *N, unsigned PredNo) const { - assert(0 && "Tblgen should generate the implementation of this!"); - return 0; + llvm_unreachable("Tblgen should generate the implementation of this!"); } virtual bool CheckComplexPattern(SDNode *Root, SDNode *Parent, SDValue N, unsigned PatternNo, SmallVectorImpl > &Result) { - assert(0 && "Tblgen should generate the implementation of this!"); - return false; + llvm_unreachable("Tblgen should generate the implementation of this!"); } virtual SDValue RunSDNodeXForm(SDValue V, unsigned XFormNo) { - assert(0 && "Tblgen should generate this!"); - return SDValue(); + llvm_unreachable("Tblgen should generate this!"); } SDNode *SelectCodeCommon(SDNode *NodeToMatch, diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 6c7be69b4d2f..f8248b845337 100644 --- a/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -917,12 +917,13 @@ class MemSDNode : public SDNode { // with MachineMemOperand information. bool isVolatile() const { return (SubclassData >> 5) & 1; } bool isNonTemporal() const { return (SubclassData >> 6) & 1; } + bool isInvariant() const { return (SubclassData >> 7) & 1; } AtomicOrdering getOrdering() const { - return AtomicOrdering((SubclassData >> 7) & 15); + return AtomicOrdering((SubclassData >> 8) & 15); } SynchronizationScope getSynchScope() const { - return SynchronizationScope((SubclassData >> 11) & 1); + return SynchronizationScope((SubclassData >> 12) & 1); } /// Returns the SrcValue and offset that describes the location of the access @@ -932,6 +933,9 @@ class MemSDNode : public SDNode { /// Returns the TBAAInfo that describes the dereference. const MDNode *getTBAAInfo() const { return MMO->getTBAAInfo(); } + /// Returns the Ranges that describes the dereference. + const MDNode *getRanges() const { return MMO->getRanges(); } + /// getMemoryVT - Return the type of the in-memory value. EVT getMemoryVT() const { return MemoryVT; } @@ -993,8 +997,8 @@ class AtomicSDNode : public MemSDNode { "Ordering may not require more than 4 bits!"); assert((SynchScope & 1) == SynchScope && "SynchScope may not require more than 1 bit!"); - SubclassData |= Ordering << 7; - SubclassData |= SynchScope << 11; + SubclassData |= Ordering << 8; + SubclassData |= SynchScope << 12; assert(getOrdering() == Ordering && "Ordering encoding error!"); assert(getSynchScope() == SynchScope && "Synch-scope encoding error!"); @@ -1113,11 +1117,9 @@ class ShuffleVectorSDNode : public SDNode { } public: - void getMask(SmallVectorImpl &M) const { + ArrayRef getMask() const { EVT VT = getValueType(0); - M.clear(); - for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) - M.push_back(Mask[i]); + return makeArrayRef(Mask, VT.getVectorNumElements()); } int getMaskElt(unsigned Idx) const { assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!"); @@ -1434,6 +1436,23 @@ class RegisterSDNode : public SDNode { } }; +class RegisterMaskSDNode : public SDNode { + // The memory for RegMask is not owned by the node. + const uint32_t *RegMask; + friend class SelectionDAG; + RegisterMaskSDNode(const uint32_t *mask) + : SDNode(ISD::RegisterMask, DebugLoc(), getSDVTList(MVT::Untyped)), + RegMask(mask) {} +public: + + const uint32_t *getRegMask() const { return RegMask; } + + static bool classof(const RegisterMaskSDNode *) { return true; } + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::RegisterMask; + } +}; + class BlockAddressSDNode : public SDNode { const BlockAddress *BA; unsigned char TargetFlags; @@ -1684,6 +1703,8 @@ class MachineSDNode : public SDNode { /// setMemRefs - Assign this MachineSDNodes's memory reference descriptor /// list. This does not transfer ownership. void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) { + for (mmo_iterator MMI = NewMemRefs, MME = NewMemRefsEnd; MMI != MME; ++MMI) + assert(*MMI && "Null mem ref detected!"); MemRefs = NewMemRefs; MemRefsEnd = NewMemRefsEnd; } diff --git a/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h b/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h index 2d98864dc9ed..d868cb8dade8 100644 --- a/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h +++ b/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h @@ -19,7 +19,7 @@ #ifndef LLVM_CODEGEN_SLOTINDEXES_H #define LLVM_CODEGEN_SLOTINDEXES_H -#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/ADT/PointerIntPair.h" @@ -83,7 +83,29 @@ namespace llvm { friend class SlotIndexes; friend struct DenseMapInfo; - enum Slot { LOAD, USE, DEF, STORE, NUM }; + enum Slot { + /// Basic block boundary. Used for live ranges entering and leaving a + /// block without being live in the layout neighbor. Also used as the + /// def slot of PHI-defs. + Slot_Block, + + /// Early-clobber register use/def slot. A live range defined at + /// Slot_EarlyCLobber interferes with normal live ranges killed at + /// Slot_Register. Also used as the kill slot for live ranges tied to an + /// early-clobber def. + Slot_EarlyClobber, + + /// Normal register use/def slot. Normal instructions kill and define + /// register live ranges at this slot. + Slot_Register, + + /// Dead def kill point. Kill slot for a live range that is defined by + /// the same instruction (Slot_Register or Slot_EarlyClobber), but isn't + /// used anywhere. + Slot_Dead, + + Slot_Count + }; PointerIntPair lie; @@ -113,7 +135,7 @@ namespace llvm { enum { /// The default distance between instructions as returned by distance(). /// This may vary as instructions are inserted and removed. - InstrDist = 4*NUM + InstrDist = 4 * Slot_Count }; static inline SlotIndex getEmptyKey() { @@ -186,69 +208,55 @@ namespace llvm { return A.lie.getPointer() == B.lie.getPointer(); } + /// isEarlierInstr - Return true if A refers to an instruction earlier than + /// B. This is equivalent to A < B && !isSameInstr(A, B). + static bool isEarlierInstr(SlotIndex A, SlotIndex B) { + return A.entry().getIndex() < B.entry().getIndex(); + } + /// Return the distance from this index to the given one. int distance(SlotIndex other) const { return other.getIndex() - getIndex(); } - /// isLoad - Return true if this is a LOAD slot. - bool isLoad() const { - return getSlot() == LOAD; - } + /// isBlock - Returns true if this is a block boundary slot. + bool isBlock() const { return getSlot() == Slot_Block; } - /// isDef - Return true if this is a DEF slot. - bool isDef() const { - return getSlot() == DEF; - } + /// isEarlyClobber - Returns true if this is an early-clobber slot. + bool isEarlyClobber() const { return getSlot() == Slot_EarlyClobber; } - /// isUse - Return true if this is a USE slot. - bool isUse() const { - return getSlot() == USE; - } + /// isRegister - Returns true if this is a normal register use/def slot. + /// Note that early-clobber slots may also be used for uses and defs. + bool isRegister() const { return getSlot() == Slot_Register; } - /// isStore - Return true if this is a STORE slot. - bool isStore() const { - return getSlot() == STORE; - } + /// isDead - Returns true if this is a dead def kill slot. + bool isDead() const { return getSlot() == Slot_Dead; } /// Returns the base index for associated with this index. The base index - /// is the one associated with the LOAD slot for the instruction pointed to - /// by this index. + /// is the one associated with the Slot_Block slot for the instruction + /// pointed to by this index. SlotIndex getBaseIndex() const { - return getLoadIndex(); + return SlotIndex(&entry(), Slot_Block); } /// Returns the boundary index for associated with this index. The boundary - /// index is the one associated with the LOAD slot for the instruction + /// index is the one associated with the Slot_Block slot for the instruction /// pointed to by this index. SlotIndex getBoundaryIndex() const { - return getStoreIndex(); + return SlotIndex(&entry(), Slot_Dead); } - /// Returns the index of the LOAD slot for the instruction pointed to by - /// this index. - SlotIndex getLoadIndex() const { - return SlotIndex(&entry(), SlotIndex::LOAD); - } - - /// Returns the index of the USE slot for the instruction pointed to by - /// this index. - SlotIndex getUseIndex() const { - return SlotIndex(&entry(), SlotIndex::USE); + /// Returns the register use/def slot in the current instruction for a + /// normal or early-clobber def. + SlotIndex getRegSlot(bool EC = false) const { + return SlotIndex(&entry(), EC ? Slot_EarlyClobber : Slot_Register); } - /// Returns the index of the DEF slot for the instruction pointed to by - /// this index. - SlotIndex getDefIndex() const { - return SlotIndex(&entry(), SlotIndex::DEF); + /// Returns the dead def kill slot for the current instruction. + SlotIndex getDeadSlot() const { + return SlotIndex(&entry(), Slot_Dead); } - /// Returns the index of the STORE slot for the instruction pointed to by - /// this index. - SlotIndex getStoreIndex() const { - return SlotIndex(&entry(), SlotIndex::STORE); - } - /// Returns the next slot in the index list. This could be either the /// next slot for the instruction pointed to by this index or, if this /// index is a STORE, the first slot for the next instruction. @@ -257,8 +265,8 @@ namespace llvm { /// use one of those methods. SlotIndex getNextSlot() const { Slot s = getSlot(); - if (s == SlotIndex::STORE) { - return SlotIndex(entry().getNext(), SlotIndex::LOAD); + if (s == Slot_Dead) { + return SlotIndex(entry().getNext(), Slot_Block); } return SlotIndex(&entry(), s + 1); } @@ -271,14 +279,14 @@ namespace llvm { /// Returns the previous slot in the index list. This could be either the /// previous slot for the instruction pointed to by this index or, if this - /// index is a LOAD, the last slot for the previous instruction. + /// index is a Slot_Block, the last slot for the previous instruction. /// WARNING: This method is considerably more expensive than the methods /// that return specific slots (getUseIndex(), etc). If you can - please /// use one of those methods. SlotIndex getPrevSlot() const { Slot s = getSlot(); - if (s == SlotIndex::LOAD) { - return SlotIndex(entry().getPrev(), SlotIndex::STORE); + if (s == Slot_Block) { + return SlotIndex(entry().getPrev(), Slot_Dead); } return SlotIndex(&entry(), s - 1); } @@ -464,11 +472,6 @@ namespace llvm { return SlotIndex(back(), 0); } - /// Returns the invalid index marker for this analysis. - SlotIndex getInvalidIndex() { - return getZeroIndex(); - } - /// Returns the distance between the highest and lowest indexes allocated /// so far. unsigned getIndexesLength() const { @@ -486,12 +489,13 @@ namespace llvm { /// Returns true if the given machine instr is mapped to an index, /// otherwise returns false. bool hasIndex(const MachineInstr *instr) const { - return (mi2iMap.find(instr) != mi2iMap.end()); + return mi2iMap.count(instr); } /// Returns the base index for the given instruction. - SlotIndex getInstructionIndex(const MachineInstr *instr) const { - Mi2IndexMap::const_iterator itr = mi2iMap.find(instr); + SlotIndex getInstructionIndex(const MachineInstr *MI) const { + // Instructions inside a bundle have the same number as the bundle itself. + Mi2IndexMap::const_iterator itr = mi2iMap.find(getBundleStart(MI)); assert(itr != mi2iMap.end() && "Instruction not found in maps."); return itr->second; } @@ -645,6 +649,8 @@ namespace llvm { /// instructions, create the new index after the null indexes instead of /// before them. SlotIndex insertMachineInstrInMaps(MachineInstr *mi, bool Late = false) { + assert(!mi->isInsideBundle() && + "Instructions inside bundles should use bundle start's slot."); assert(mi2iMap.find(mi) == mi2iMap.end() && "Instr already indexed."); // Numbering DBG_VALUE instructions could cause code generation to be // affected by debug information. @@ -677,7 +683,7 @@ namespace llvm { if (dist == 0) renumberIndexes(newEntry); - SlotIndex newIndex(newEntry, SlotIndex::LOAD); + SlotIndex newIndex(newEntry, SlotIndex::Slot_Block); mi2iMap.insert(std::make_pair(mi, newIndex)); return newIndex; } @@ -728,8 +734,8 @@ namespace llvm { insert(nextEntry, startEntry); insert(nextEntry, stopEntry); - SlotIndex startIdx(startEntry, SlotIndex::LOAD); - SlotIndex endIdx(nextEntry, SlotIndex::LOAD); + SlotIndex startIdx(startEntry, SlotIndex::Slot_Block); + SlotIndex endIdx(nextEntry, SlotIndex::Slot_Block); assert(unsigned(mbb->getNumber()) == MBBRanges.size() && "Blocks must be added in order"); diff --git a/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index ca40ccf85378..5a4213625bae 100644 --- a/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -15,9 +15,9 @@ #ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H #define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H -#include "llvm/ADT/StringRef.h" #include "llvm/MC/SectionKind.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/ADT/StringRef.h" namespace llvm { class MachineModuleInfo; @@ -65,6 +65,11 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile { virtual MCSymbol * getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI) const; + + virtual const MCSection * + getStaticCtorSection(unsigned Priority = 65535) const; + virtual const MCSection * + getStaticDtorSection(unsigned Priority = 65535) const; }; @@ -73,6 +78,12 @@ class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile { public: virtual ~TargetLoweringObjectFileMachO() {} + /// emitModuleFlags - Emit the module flags that specify the garbage + /// collection information. + virtual void emitModuleFlags(MCStreamer &Streamer, + ArrayRef ModuleFlags, + Mangler *Mang, const TargetMachine &TM) const; + virtual const MCSection * SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const; diff --git a/contrib/llvm/include/llvm/CodeGen/ValueTypes.h b/contrib/llvm/include/llvm/CodeGen/ValueTypes.h index cae0bcb165c1..76c2357a552f 100644 --- a/contrib/llvm/include/llvm/CodeGen/ValueTypes.h +++ b/contrib/llvm/include/llvm/CodeGen/ValueTypes.h @@ -16,10 +16,11 @@ #ifndef LLVM_CODEGEN_VALUETYPES_H #define LLVM_CODEGEN_VALUETYPES_H +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include #include -#include "llvm/Support/DataTypes.h" -#include "llvm/Support/MathExtras.h" namespace llvm { class Type; @@ -45,49 +46,56 @@ namespace llvm { FIRST_INTEGER_VALUETYPE = i1, LAST_INTEGER_VALUETYPE = i128, - f32 = 7, // This is a 32 bit floating point value - f64 = 8, // This is a 64 bit floating point value - f80 = 9, // This is a 80 bit floating point value - f128 = 10, // This is a 128 bit floating point value - ppcf128 = 11, // This is a PPC 128-bit floating point value + f16 = 7, // This is a 16 bit floating point value + f32 = 8, // This is a 32 bit floating point value + f64 = 9, // This is a 64 bit floating point value + f80 = 10, // This is a 80 bit floating point value + f128 = 11, // This is a 128 bit floating point value + ppcf128 = 12, // This is a PPC 128-bit floating point value - v2i8 = 12, // 2 x i8 - v4i8 = 13, // 4 x i8 - v8i8 = 14, // 8 x i8 - v16i8 = 15, // 16 x i8 - v32i8 = 16, // 32 x i8 - v2i16 = 17, // 2 x i16 - v4i16 = 18, // 4 x i16 - v8i16 = 19, // 8 x i16 - v16i16 = 20, // 16 x i16 - v2i32 = 21, // 2 x i32 - v4i32 = 22, // 4 x i32 - v8i32 = 23, // 8 x i32 - v1i64 = 24, // 1 x i64 - v2i64 = 25, // 2 x i64 - v4i64 = 26, // 4 x i64 - v8i64 = 27, // 8 x i64 + FIRST_FP_VALUETYPE = f16, + LAST_FP_VALUETYPE = ppcf128, - v2f32 = 28, // 2 x f32 - v4f32 = 29, // 4 x f32 - v8f32 = 30, // 8 x f32 - v2f64 = 31, // 2 x f64 - v4f64 = 32, // 4 x f64 + v2i8 = 13, // 2 x i8 + v4i8 = 14, // 4 x i8 + v8i8 = 15, // 8 x i8 + v16i8 = 16, // 16 x i8 + v32i8 = 17, // 32 x i8 + v2i16 = 18, // 2 x i16 + v4i16 = 19, // 4 x i16 + v8i16 = 20, // 8 x i16 + v16i16 = 21, // 16 x i16 + v2i32 = 22, // 2 x i32 + v4i32 = 23, // 4 x i32 + v8i32 = 24, // 8 x i32 + v1i64 = 25, // 1 x i64 + v2i64 = 26, // 2 x i64 + v4i64 = 27, // 4 x i64 + v8i64 = 28, // 8 x i64 + + v2f16 = 29, // 2 x f16 + v2f32 = 30, // 2 x f32 + v4f32 = 31, // 4 x f32 + v8f32 = 32, // 8 x f32 + v2f64 = 33, // 2 x f64 + v4f64 = 34, // 4 x f64 FIRST_VECTOR_VALUETYPE = v2i8, LAST_VECTOR_VALUETYPE = v4f64, + FIRST_FP_VECTOR_VALUETYPE = v2f16, + LAST_FP_VECTOR_VALUETYPE = v4f64, - x86mmx = 33, // This is an X86 MMX value + x86mmx = 35, // This is an X86 MMX value - Glue = 34, // This glues nodes together during pre-RA sched + Glue = 36, // This glues nodes together during pre-RA sched - isVoid = 35, // This has no value + isVoid = 37, // This has no value - untyped = 36, // This value takes a register, but has + Untyped = 38, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. - LAST_VALUETYPE = 37, // This always remains at the end of the list. + LAST_VALUETYPE = 39, // This always remains at the end of the list. // This is the current maximum for LAST_VALUETYPE. // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors @@ -143,8 +151,10 @@ namespace llvm { /// isFloatingPoint - Return true if this is a FP, or a vector FP type. bool isFloatingPoint() const { - return ((SimpleTy >= MVT::f32 && SimpleTy <= MVT::ppcf128) || - (SimpleTy >= MVT::v2f32 && SimpleTy <= MVT::v4f64)); + return ((SimpleTy >= MVT::FIRST_FP_VALUETYPE && + SimpleTy <= MVT::LAST_FP_VALUETYPE) || + (SimpleTy >= MVT::FIRST_FP_VECTOR_VALUETYPE && + SimpleTy <= MVT::LAST_FP_VECTOR_VALUETYPE)); } /// isInteger - Return true if this is an integer, or a vector integer type. @@ -203,6 +213,7 @@ namespace llvm { case v2i64: case v4i64: case v8i64: return i64; + case v2f16: return f16; case v2f32: case v4f32: case v8f32: return f32; @@ -233,6 +244,7 @@ namespace llvm { case v2i16: case v2i32: case v2i64: + case v2f16: case v2f32: case v2f64: return 2; case v1i64: return 1; @@ -242,21 +254,23 @@ namespace llvm { unsigned getSizeInBits() const { switch (SimpleTy) { case iPTR: - assert(0 && "Value type size is target-dependent. Ask TLI."); + llvm_unreachable("Value type size is target-dependent. Ask TLI."); case iPTRAny: case iAny: case fAny: - assert(0 && "Value type is overloaded."); + llvm_unreachable("Value type is overloaded."); default: - assert(0 && "getSizeInBits called on extended MVT."); + llvm_unreachable("getSizeInBits called on extended MVT."); case i1 : return 1; case i8 : return 8; case i16 : + case f16: case v2i8: return 16; case f32 : case i32 : case v4i8: - case v2i16: return 32; + case v2i16: + case v2f16: return 32; case x86mmx: case f64 : case i64 : @@ -300,7 +314,9 @@ namespace llvm { static MVT getFloatingPointVT(unsigned BitWidth) { switch (BitWidth) { default: - assert(false && "Bad bit width!"); + llvm_unreachable("Bad bit width!"); + case 16: + return MVT::f16; case 32: return MVT::f32; case 64: @@ -359,6 +375,9 @@ namespace llvm { if (NumElements == 4) return MVT::v4i64; if (NumElements == 8) return MVT::v8i64; break; + case MVT::f16: + if (NumElements == 2) return MVT::v2f16; + break; case MVT::f32: if (NumElements == 2) return MVT::v2f32; if (NumElements == 4) return MVT::v4f32; @@ -424,20 +443,6 @@ namespace llvm { return getExtendedVectorVT(Context, VT, NumElements); } - /// getIntVectorWithNumElements - Return any integer vector type that has - /// the specified number of elements. - static EVT getIntVectorWithNumElements(LLVMContext &C, unsigned NumElts) { - switch (NumElts) { - default: return getVectorVT(C, MVT::i8, NumElts); - case 1: return MVT::v1i64; - case 2: return MVT::v2i32; - case 4: return MVT::v4i16; - case 8: return MVT::v8i8; - case 16: return MVT::v16i8; - } - return MVT::INVALID_SIMPLE_VALUE_TYPE; - } - /// changeVectorElementTypeToInteger - Return a vector with the same number /// of elements as this vector, but with the element type converted to an /// integer type with the same bitwidth. diff --git a/contrib/llvm/include/llvm/CodeGen/ValueTypes.td b/contrib/llvm/include/llvm/CodeGen/ValueTypes.td index 0cfb634ead79..6c2269052a11 100644 --- a/contrib/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/contrib/llvm/include/llvm/CodeGen/ValueTypes.td @@ -26,39 +26,41 @@ def i16 : ValueType<16 , 3>; // 16-bit integer value def i32 : ValueType<32 , 4>; // 32-bit integer value def i64 : ValueType<64 , 5>; // 64-bit integer value def i128 : ValueType<128, 6>; // 128-bit integer value -def f32 : ValueType<32 , 7>; // 32-bit floating point value -def f64 : ValueType<64 , 8>; // 64-bit floating point value -def f80 : ValueType<80 , 9>; // 80-bit floating point value -def f128 : ValueType<128, 10>; // 128-bit floating point value -def ppcf128: ValueType<128, 11>; // PPC 128-bit floating point value +def f16 : ValueType<16 , 7>; // 32-bit floating point value +def f32 : ValueType<32 , 8>; // 32-bit floating point value +def f64 : ValueType<64 , 9>; // 64-bit floating point value +def f80 : ValueType<80 , 10>; // 80-bit floating point value +def f128 : ValueType<128, 11>; // 128-bit floating point value +def ppcf128: ValueType<128, 12>; // PPC 128-bit floating point value -def v2i8 : ValueType<16 , 12>; // 2 x i8 vector value -def v4i8 : ValueType<32 , 13>; // 4 x i8 vector value -def v8i8 : ValueType<64 , 14>; // 8 x i8 vector value -def v16i8 : ValueType<128, 15>; // 16 x i8 vector value -def v32i8 : ValueType<256, 16>; // 32 x i8 vector value -def v2i16 : ValueType<32 , 17>; // 2 x i16 vector value -def v4i16 : ValueType<64 , 18>; // 4 x i16 vector value -def v8i16 : ValueType<128, 19>; // 8 x i16 vector value -def v16i16 : ValueType<256, 20>; // 16 x i16 vector value -def v2i32 : ValueType<64 , 21>; // 2 x i32 vector value -def v4i32 : ValueType<128, 22>; // 4 x i32 vector value -def v8i32 : ValueType<256, 23>; // 8 x i32 vector value -def v1i64 : ValueType<64 , 24>; // 1 x i64 vector value -def v2i64 : ValueType<128, 25>; // 2 x i64 vector value -def v4i64 : ValueType<256, 26>; // 4 x i64 vector value -def v8i64 : ValueType<512, 27>; // 8 x i64 vector value +def v2i8 : ValueType<16 , 13>; // 2 x i8 vector value +def v4i8 : ValueType<32 , 14>; // 4 x i8 vector value +def v8i8 : ValueType<64 , 15>; // 8 x i8 vector value +def v16i8 : ValueType<128, 16>; // 16 x i8 vector value +def v32i8 : ValueType<256, 17>; // 32 x i8 vector value +def v2i16 : ValueType<32 , 18>; // 2 x i16 vector value +def v4i16 : ValueType<64 , 19>; // 4 x i16 vector value +def v8i16 : ValueType<128, 20>; // 8 x i16 vector value +def v16i16 : ValueType<256, 21>; // 16 x i16 vector value +def v2i32 : ValueType<64 , 22>; // 2 x i32 vector value +def v4i32 : ValueType<128, 23>; // 4 x i32 vector value +def v8i32 : ValueType<256, 24>; // 8 x i32 vector value +def v1i64 : ValueType<64 , 25>; // 1 x i64 vector value +def v2i64 : ValueType<128, 26>; // 2 x i64 vector value +def v4i64 : ValueType<256, 27>; // 4 x i64 vector value +def v8i64 : ValueType<512, 28>; // 8 x i64 vector value -def v2f32 : ValueType<64 , 28>; // 2 x f32 vector value -def v4f32 : ValueType<128, 29>; // 4 x f32 vector value -def v8f32 : ValueType<256, 30>; // 8 x f32 vector value -def v2f64 : ValueType<128, 31>; // 2 x f64 vector value -def v4f64 : ValueType<256, 32>; // 4 x f64 vector value +def v2f16 : ValueType<32 , 29>; // 2 x f16 vector value +def v2f32 : ValueType<64 , 30>; // 2 x f32 vector value +def v4f32 : ValueType<128, 31>; // 4 x f32 vector value +def v8f32 : ValueType<256, 32>; // 8 x f32 vector value +def v2f64 : ValueType<128, 33>; // 2 x f64 vector value +def v4f64 : ValueType<256, 34>; // 4 x f64 vector value -def x86mmx : ValueType<64 , 33>; // X86 MMX value -def FlagVT : ValueType<0 , 34>; // Pre-RA sched glue -def isVoid : ValueType<0 , 35>; // Produces no value -def untyped: ValueType<8 , 36>; // Produces an untyped value +def x86mmx : ValueType<64 , 35>; // X86 MMX value +def FlagVT : ValueType<0 , 36>; // Pre-RA sched glue +def isVoid : ValueType<0 , 37>; // Produces no value +def untyped: ValueType<8 , 38>; // Produces an untyped value def MetadataVT: ValueType<0, 250>; // Metadata diff --git a/contrib/llvm/include/llvm/Constant.h b/contrib/llvm/include/llvm/Constant.h index ecc1fe70cc5d..13acdc66b892 100644 --- a/contrib/llvm/include/llvm/Constant.h +++ b/contrib/llvm/include/llvm/Constant.h @@ -41,6 +41,7 @@ namespace llvm { class Constant : public User { void operator=(const Constant &); // Do not implement Constant(const Constant &); // Do not implement + virtual void anchor(); protected: Constant(Type *ty, ValueTy vty, Use *Ops, unsigned NumOps) @@ -90,12 +91,13 @@ class Constant : public User { /// FIXME: This really should not be in VMCore. PossibleRelocationsTy getRelocationInfo() const; - /// getVectorElements - This method, which is only valid on constant of vector - /// type, returns the elements of the vector in the specified smallvector. - /// This handles breaking down a vector undef into undef elements, etc. For - /// constant exprs and other cases we can't handle, we return an empty vector. - void getVectorElements(SmallVectorImpl &Elts) const; - + /// getAggregateElement - For aggregates (struct/array/vector) return the + /// constant that corresponds to the specified element if possible, or null if + /// not. This can return null if the element index is a ConstantExpr, or if + /// 'this' is a constant expr. + Constant *getAggregateElement(unsigned Elt) const; + Constant *getAggregateElement(Constant *Elt) const; + /// destroyConstant - Called if some element of this constant is no longer /// valid. At this point only other constants may be on the use_list for this /// constant. Any constants on our Use list must also be destroy'd. The @@ -103,7 +105,7 @@ class Constant : public User { /// available cached constants. Implementations should call /// destroyConstantImpl as the last thing they do, to destroy all users and /// delete this. - virtual void destroyConstant() { assert(0 && "Not reached!"); } + virtual void destroyConstant() { llvm_unreachable("Not reached!"); } //// Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Constant *) { return true; } @@ -129,11 +131,12 @@ class Constant : public User { // to be here to avoid link errors. assert(getNumOperands() == 0 && "replaceUsesOfWithOnConstant must be " "implemented for all constants that have operands!"); - assert(0 && "Constants that do not have operands cannot be using 'From'!"); + llvm_unreachable("Constants that do not have operands cannot be using " + "'From'!"); } - + static Constant *getNullValue(Type* Ty); - + /// @returns the value for an integer constant of the given type that has all /// its bits set to true. /// @brief Get the all ones value diff --git a/contrib/llvm/include/llvm/Constants.h b/contrib/llvm/include/llvm/Constants.h index 6545a3fedb92..0abe17d365d4 100644 --- a/contrib/llvm/include/llvm/Constants.h +++ b/contrib/llvm/include/llvm/Constants.h @@ -34,10 +34,13 @@ class IntegerType; class StructType; class PointerType; class VectorType; +class SequentialType; template struct ConstantCreator; template +struct ConstantArrayCreator; +template struct ConvertConstantType; //===----------------------------------------------------------------------===// @@ -45,6 +48,7 @@ struct ConvertConstantType; /// represents both boolean and integral constants. /// @brief Class for constant integers. class ConstantInt : public Constant { + virtual void anchor(); void *operator new(size_t, unsigned); // DO NOT IMPLEMENT ConstantInt(const ConstantInt &); // DO NOT IMPLEMENT ConstantInt(IntegerType *Ty, const APInt& V); @@ -229,6 +233,7 @@ class ConstantInt : public Constant { /// class ConstantFP : public Constant { APFloat Val; + virtual void anchor(); void *operator new(size_t, unsigned);// DO NOT IMPLEMENT ConstantFP(const ConstantFP &); // DO NOT IMPLEMENT friend class LLVMContextImpl; @@ -296,7 +301,6 @@ class ConstantFP : public Constant { /// ConstantAggregateZero - All zero aggregate value /// class ConstantAggregateZero : public Constant { - friend struct ConstantCreator; void *operator new(size_t, unsigned); // DO NOT IMPLEMENT ConstantAggregateZero(const ConstantAggregateZero &); // DO NOT IMPLEMENT protected: @@ -308,10 +312,26 @@ class ConstantAggregateZero : public Constant { return User::operator new(s, 0); } public: - static ConstantAggregateZero* get(Type *Ty); + static ConstantAggregateZero *get(Type *Ty); virtual void destroyConstant(); + /// getSequentialElement - If this CAZ has array or vector type, return a zero + /// with the right element type. + Constant *getSequentialElement() const; + + /// getStructElement - If this CAZ has struct type, return a zero with the + /// right element type for the specified element. + Constant *getStructElement(unsigned Elt) const; + + /// getElementValue - Return a zero of the right value for the specified GEP + /// index. + Constant *getElementValue(Constant *C) const; + + /// getElementValue - Return a zero of the right value for the specified GEP + /// index. + Constant *getElementValue(unsigned Idx) const; + /// Methods for support type inquiry through isa, cast, and dyn_cast: /// static bool classof(const ConstantAggregateZero *) { return true; } @@ -325,8 +345,7 @@ class ConstantAggregateZero : public Constant { /// ConstantArray - Constant Array Declarations /// class ConstantArray : public Constant { - friend struct ConstantCreator >; + friend struct ConstantArrayCreator; ConstantArray(const ConstantArray &); // DO NOT IMPLEMENT protected: ConstantArray(ArrayType *T, ArrayRef Val); @@ -334,15 +353,6 @@ class ConstantArray : public Constant { // ConstantArray accessors static Constant *get(ArrayType *T, ArrayRef V); - /// This method constructs a ConstantArray and initializes it with a text - /// string. The default behavior (AddNull==true) causes a null terminator to - /// be placed at the end of the array. This effectively increases the length - /// of the array by one (you've been warned). However, in some situations - /// this is not desired so if AddNull==false then the string is copied without - /// null termination. - static Constant *get(LLVMContext &Context, StringRef Initializer, - bool AddNull = true); - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); @@ -353,28 +363,6 @@ class ConstantArray : public Constant { return reinterpret_cast(Value::getType()); } - /// isString - This method returns true if the array is an array of i8 and - /// the elements of the array are all ConstantInt's. - bool isString() const; - - /// isCString - This method returns true if the array is a string (see - /// @verbatim - /// isString) and it ends in a null byte \0 and does not contains any other - /// @endverbatim - /// null bytes except its terminator. - bool isCString() const; - - /// getAsString - If this array is isString(), then this method converts the - /// array to an std::string and returns it. Otherwise, it asserts out. - /// - std::string getAsString() const; - - /// getAsCString - If this array is isCString(), then this method converts the - /// array (without the trailing null byte) to an std::string and returns it. - /// Otherwise, it asserts out. - /// - std::string getAsCString() const; - virtual void destroyConstant(); virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U); @@ -396,8 +384,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantArray, Constant) // ConstantStruct - Constant Struct Declarations // class ConstantStruct : public Constant { - friend struct ConstantCreator >; + friend struct ConstantArrayCreator; ConstantStruct(const ConstantStruct &); // DO NOT IMPLEMENT protected: ConstantStruct(StructType *T, ArrayRef Val); @@ -457,8 +444,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantStruct, Constant) /// ConstantVector - Constant Vector Declarations /// class ConstantVector : public Constant { - friend struct ConstantCreator >; + friend struct ConstantArrayCreator; ConstantVector(const ConstantVector &); // DO NOT IMPLEMENT protected: ConstantVector(VectorType *T, ArrayRef Val); @@ -466,6 +452,10 @@ class ConstantVector : public Constant { // ConstantVector accessors static Constant *get(ArrayRef V); + /// getSplat - Return a ConstantVector with the specified constant in each + /// element. + static Constant *getSplat(unsigned NumElts, Constant *Elt); + /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); @@ -475,12 +465,6 @@ class ConstantVector : public Constant { inline VectorType *getType() const { return reinterpret_cast(Value::getType()); } - - /// This function will return true iff every element in this vector constant - /// is set to all ones. - /// @returns true iff this constant's emements are all set to all ones. - /// @brief Determine if the value is all ones. - bool isAllOnesValue() const; /// getSplatValue - If this is a splat constant, meaning that all of the /// elements have the same value, return that value. Otherwise return NULL. @@ -507,7 +491,6 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantVector, Constant) /// ConstantPointerNull - a constant pointer value that points to null /// class ConstantPointerNull : public Constant { - friend struct ConstantCreator; void *operator new(size_t, unsigned); // DO NOT IMPLEMENT ConstantPointerNull(const ConstantPointerNull &); // DO NOT IMPLEMENT protected: @@ -539,6 +522,240 @@ class ConstantPointerNull : public Constant { return V->getValueID() == ConstantPointerNullVal; } }; + +//===----------------------------------------------------------------------===// +/// ConstantDataSequential - A vector or array constant whose element type is a +/// simple 1/2/4/8-byte integer or float/double, and whose elements are just +/// simple data values (i.e. ConstantInt/ConstantFP). This Constant node has no +/// operands because it stores all of the elements of the constant as densely +/// packed data, instead of as Value*'s. +/// +/// This is the common base class of ConstantDataArray and ConstantDataVector. +/// +class ConstantDataSequential : public Constant { + friend class LLVMContextImpl; + /// DataElements - A pointer to the bytes underlying this constant (which is + /// owned by the uniquing StringMap). + const char *DataElements; + + /// Next - This forms a link list of ConstantDataSequential nodes that have + /// the same value but different type. For example, 0,0,0,1 could be a 4 + /// element array of i8, or a 1-element array of i32. They'll both end up in + /// the same StringMap bucket, linked up. + ConstantDataSequential *Next; + void *operator new(size_t, unsigned); // DO NOT IMPLEMENT + ConstantDataSequential(const ConstantDataSequential &); // DO NOT IMPLEMENT +protected: + explicit ConstantDataSequential(Type *ty, ValueTy VT, const char *Data) + : Constant(ty, VT, 0, 0), DataElements(Data), Next(0) {} + ~ConstantDataSequential() { delete Next; } + + static Constant *getImpl(StringRef Bytes, Type *Ty); + +protected: + // allocate space for exactly zero operands. + void *operator new(size_t s) { + return User::operator new(s, 0); + } +public: + + /// isElementTypeCompatible - Return true if a ConstantDataSequential can be + /// formed with a vector or array of the specified element type. + /// ConstantDataArray only works with normal float and int types that are + /// stored densely in memory, not with things like i42 or x86_f80. + static bool isElementTypeCompatible(const Type *Ty); + + /// getElementAsInteger - If this is a sequential container of integers (of + /// any size), return the specified element in the low bits of a uint64_t. + uint64_t getElementAsInteger(unsigned i) const; + + /// getElementAsAPFloat - If this is a sequential container of floating point + /// type, return the specified element as an APFloat. + APFloat getElementAsAPFloat(unsigned i) const; + + /// getElementAsFloat - If this is an sequential container of floats, return + /// the specified element as a float. + float getElementAsFloat(unsigned i) const; + + /// getElementAsDouble - If this is an sequential container of doubles, return + /// the specified element as a double. + double getElementAsDouble(unsigned i) const; + + /// getElementAsConstant - Return a Constant for a specified index's element. + /// Note that this has to compute a new constant to return, so it isn't as + /// efficient as getElementAsInteger/Float/Double. + Constant *getElementAsConstant(unsigned i) const; + + /// getType - Specialize the getType() method to always return a + /// SequentialType, which reduces the amount of casting needed in parts of the + /// compiler. + inline SequentialType *getType() const { + return reinterpret_cast(Value::getType()); + } + + /// getElementType - Return the element type of the array/vector. + Type *getElementType() const; + + /// getNumElements - Return the number of elements in the array or vector. + unsigned getNumElements() const; + + /// getElementByteSize - Return the size (in bytes) of each element in the + /// array/vector. The size of the elements is known to be a multiple of one + /// byte. + uint64_t getElementByteSize() const; + + + /// isString - This method returns true if this is an array of i8. + bool isString() const; + + /// isCString - This method returns true if the array "isString", ends with a + /// nul byte, and does not contains any other nul bytes. + bool isCString() const; + + /// getAsString - If this array is isString(), then this method returns the + /// array as a StringRef. Otherwise, it asserts out. + /// + StringRef getAsString() const { + assert(isString() && "Not a string"); + return getRawDataValues(); + } + + /// getAsCString - If this array is isCString(), then this method returns the + /// array (without the trailing null byte) as a StringRef. Otherwise, it + /// asserts out. + /// + StringRef getAsCString() const { + assert(isCString() && "Isn't a C string"); + StringRef Str = getAsString(); + return Str.substr(0, Str.size()-1); + } + + /// getRawDataValues - Return the raw, underlying, bytes of this data. Note + /// that this is an extremely tricky thing to work with, as it exposes the + /// host endianness of the data elements. + StringRef getRawDataValues() const; + + virtual void destroyConstant(); + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + /// + static bool classof(const ConstantDataSequential *) { return true; } + static bool classof(const Value *V) { + return V->getValueID() == ConstantDataArrayVal || + V->getValueID() == ConstantDataVectorVal; + } +private: + const char *getElementPointer(unsigned Elt) const; +}; + +//===----------------------------------------------------------------------===// +/// ConstantDataArray - An array constant whose element type is a simple +/// 1/2/4/8-byte integer or float/double, and whose elements are just simple +/// data values (i.e. ConstantInt/ConstantFP). This Constant node has no +/// operands because it stores all of the elements of the constant as densely +/// packed data, instead of as Value*'s. +class ConstantDataArray : public ConstantDataSequential { + void *operator new(size_t, unsigned); // DO NOT IMPLEMENT + ConstantDataArray(const ConstantDataArray &); // DO NOT IMPLEMENT + virtual void anchor(); + friend class ConstantDataSequential; + explicit ConstantDataArray(Type *ty, const char *Data) + : ConstantDataSequential(ty, ConstantDataArrayVal, Data) {} +protected: + // allocate space for exactly zero operands. + void *operator new(size_t s) { + return User::operator new(s, 0); + } +public: + + /// get() constructors - Return a constant with array type with an element + /// count and element type matching the ArrayRef passed in. Note that this + /// can return a ConstantAggregateZero object. + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + + /// getString - This method constructs a CDS and initializes it with a text + /// string. The default behavior (AddNull==true) causes a null terminator to + /// be placed at the end of the array (increasing the length of the string by + /// one more than the StringRef would normally indicate. Pass AddNull=false + /// to disable this behavior. + static Constant *getString(LLVMContext &Context, StringRef Initializer, + bool AddNull = true); + + /// getType - Specialize the getType() method to always return an ArrayType, + /// which reduces the amount of casting needed in parts of the compiler. + /// + inline ArrayType *getType() const { + return reinterpret_cast(Value::getType()); + } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + /// + static bool classof(const ConstantDataArray *) { return true; } + static bool classof(const Value *V) { + return V->getValueID() == ConstantDataArrayVal; + } +}; + +//===----------------------------------------------------------------------===// +/// ConstantDataVector - A vector constant whose element type is a simple +/// 1/2/4/8-byte integer or float/double, and whose elements are just simple +/// data values (i.e. ConstantInt/ConstantFP). This Constant node has no +/// operands because it stores all of the elements of the constant as densely +/// packed data, instead of as Value*'s. +class ConstantDataVector : public ConstantDataSequential { + void *operator new(size_t, unsigned); // DO NOT IMPLEMENT + ConstantDataVector(const ConstantDataVector &); // DO NOT IMPLEMENT + virtual void anchor(); + friend class ConstantDataSequential; + explicit ConstantDataVector(Type *ty, const char *Data) + : ConstantDataSequential(ty, ConstantDataVectorVal, Data) {} +protected: + // allocate space for exactly zero operands. + void *operator new(size_t s) { + return User::operator new(s, 0); + } +public: + + /// get() constructors - Return a constant with vector type with an element + /// count and element type matching the ArrayRef passed in. Note that this + /// can return a ConstantAggregateZero object. + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + static Constant *get(LLVMContext &Context, ArrayRef Elts); + + /// getSplat - Return a ConstantVector with the specified constant in each + /// element. The specified constant has to be a of a compatible type (i8/i16/ + /// i32/i64/float/double) and must be a ConstantFP or ConstantInt. + static Constant *getSplat(unsigned NumElts, Constant *Elt); + + /// getSplatValue - If this is a splat constant, meaning that all of the + /// elements have the same value, return that value. Otherwise return NULL. + Constant *getSplatValue() const; + + /// getType - Specialize the getType() method to always return a VectorType, + /// which reduces the amount of casting needed in parts of the compiler. + /// + inline VectorType *getType() const { + return reinterpret_cast(Value::getType()); + } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + /// + static bool classof(const ConstantDataVector *) { return true; } + static bool classof(const Value *V) { + return V->getValueID() == ConstantDataVectorVal; + } +}; + + /// BlockAddress - The address of a basic block. /// @@ -897,7 +1114,6 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantExpr, Constant) /// LangRef.html#undefvalues for details. /// class UndefValue : public Constant { - friend struct ConstantCreator; void *operator new(size_t, unsigned); // DO NOT IMPLEMENT UndefValue(const UndefValue &); // DO NOT IMPLEMENT protected: @@ -913,6 +1129,22 @@ class UndefValue : public Constant { /// static UndefValue *get(Type *T); + /// getSequentialElement - If this Undef has array or vector type, return a + /// undef with the right element type. + UndefValue *getSequentialElement() const; + + /// getStructElement - If this undef has struct type, return a undef with the + /// right element type for the specified element. + UndefValue *getStructElement(unsigned Elt) const; + + /// getElementValue - Return an undef of the right value for the specified GEP + /// index. + UndefValue *getElementValue(Constant *C) const; + + /// getElementValue - Return an undef of the right value for the specified GEP + /// index. + UndefValue *getElementValue(unsigned Idx) const; + virtual void destroyConstant(); /// Methods for support type inquiry through isa, cast, and dyn_cast: diff --git a/contrib/llvm/include/llvm/DebugInfoProbe.h b/contrib/llvm/include/llvm/DebugInfoProbe.h deleted file mode 100644 index 78d00dfeeddf..000000000000 --- a/contrib/llvm/include/llvm/DebugInfoProbe.h +++ /dev/null @@ -1,67 +0,0 @@ -//===-- DebugInfoProbe.h - DebugInfo Probe ----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines a probe, DebugInfoProbe, that can be used by pass -// manager to analyze how optimizer is treating debugging information. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_UTILS_DEBUGINFOPROBE_H -#define LLVM_TRANSFORMS_UTILS_DEBUGINFOPROBE_H - -#include "llvm/ADT/StringMap.h" - -namespace llvm { - class Function; - class Pass; - class DebugInfoProbeImpl; - - /// DebugInfoProbe - This class provides a interface to monitor - /// how an optimization pass is preserving debugging information. - class DebugInfoProbe { - public: - DebugInfoProbe(); - ~DebugInfoProbe(); - - /// initialize - Collect information before running an optimization pass. - void initialize(StringRef PName, Function &F); - - /// finalize - Collect information after running an optimization pass. This - /// must be used after initialization. - void finalize(Function &F); - - /// report - Report findings. This should be invoked after finalize. - void report(); - - private: - DebugInfoProbeImpl *pImpl; - }; - - /// DebugInfoProbeInfo - This class provides an interface that a pass manager - /// can use to manage debug info probes. - class DebugInfoProbeInfo { - StringMap Probes; - public: - DebugInfoProbeInfo() {} - - /// ~DebugInfoProbeInfo - Report data collected by all probes before deleting - /// them. - ~DebugInfoProbeInfo(); - - /// initialize - Collect information before running an optimization pass. - void initialize(Pass *P, Function &F); - - /// finalize - Collect information after running an optimization pass. This - /// must be used after initialization. - void finalize(Pass *P, Function &F); - }; - -} // End llvm namespace - -#endif diff --git a/contrib/llvm/include/llvm/DefaultPasses.h b/contrib/llvm/include/llvm/DefaultPasses.h index 2e4145b5ebbe..929569d543d9 100644 --- a/contrib/llvm/include/llvm/DefaultPasses.h +++ b/contrib/llvm/include/llvm/DefaultPasses.h @@ -14,6 +14,8 @@ #ifndef LLVM_DEFAULT_PASS_SUPPORT_H #define LLVM_DEFAULT_PASS_SUPPORT_H +#include + namespace llvm { class PassManagerBase; diff --git a/contrib/llvm/include/llvm/DerivedTypes.h b/contrib/llvm/include/llvm/DerivedTypes.h index 445c3deb7ce2..da5ad27b1f1c 100644 --- a/contrib/llvm/include/llvm/DerivedTypes.h +++ b/contrib/llvm/include/llvm/DerivedTypes.h @@ -195,9 +195,10 @@ class StructType : public CompositeType { // This is the contents of the SubClassData field. SCDB_HasBody = 1, SCDB_Packed = 2, - SCDB_IsLiteral = 4 + SCDB_IsLiteral = 4, + SCDB_IsSized = 8 }; - + /// SymbolTableEntry - For a named struct that actually has a name, this is a /// pointer to the symbol table entry (maintained by LLVMContext) for the /// struct. This is null if the type is an literal struct or if it is @@ -248,6 +249,9 @@ class StructType : public CompositeType { /// isOpaque - Return true if this is a type with an identity that has no body /// specified yet. These prints as 'opaque' in .ll files. bool isOpaque() const { return (getSubclassData() & SCDB_HasBody) == 0; } + + /// isSized - Return true if this is a sized type. + bool isSized() const; /// hasName - Return true if this is a named struct that has a non-empty name. bool hasName() const { return SymbolTableEntry != 0; } @@ -374,6 +378,7 @@ class VectorType : public SequentialType { /// static VectorType *getInteger(VectorType *VTy) { unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); + assert(EltBits && "Element size must be of a non-zero size"); Type *EltTy = IntegerType::get(VTy->getContext(), EltBits); return VectorType::get(EltTy, VTy->getNumElements()); } @@ -408,6 +413,7 @@ class VectorType : public SequentialType { unsigned getNumElements() const { return NumElements; } /// @brief Return the number of bits in the Vector type. + /// Returns zero when the vector is a vector of pointers. unsigned getBitWidth() const { return NumElements * getElementType()->getPrimitiveSizeInBits(); } diff --git a/contrib/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h b/contrib/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h index cf85671eb414..e920e98a0bf6 100644 --- a/contrib/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/contrib/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -15,17 +15,19 @@ #ifndef LLVM_EXECUTION_ENGINE_H #define LLVM_EXECUTION_ENGINE_H -#include -#include -#include #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/ValueMap.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/Mutex.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include +#include +#include namespace llvm { @@ -41,6 +43,7 @@ class MachineCodeInfo; class Module; class MutexGuard; class TargetData; +class Triple; class Type; /// \brief Helper class for helping synchronize access to the global address map @@ -132,14 +135,12 @@ class ExecutionEngine { Module *M, std::string *ErrorStr, JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel, bool GVsWithCode, TargetMachine *TM); static ExecutionEngine *(*MCJITCtor)( Module *M, std::string *ErrorStr, JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel, bool GVsWithCode, TargetMachine *TM); static ExecutionEngine *(*InterpCtor)(Module *M, std::string *ErrorStr); @@ -228,6 +229,26 @@ class ExecutionEngine { virtual GenericValue runFunction(Function *F, const std::vector &ArgValues) = 0; + /// getPointerToNamedFunction - This method returns the address of the + /// specified function by using the dlsym function call. As such it is only + /// useful for resolving library symbols, not code generated symbols. + /// + /// If AbortOnFailure is false and no function with the given name is + /// found, this function silently returns a null pointer. Otherwise, + /// it prints a message to stderr and aborts. + /// + virtual void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true) = 0; + + /// mapSectionAddress - map a section to its target address space value. + /// Map the address of a JIT section as returned from the memory manager + /// to the address in the target process as the running code will see it. + /// This is the address which will be used for relocation resolution. + virtual void mapSectionAddress(void *LocalAddress, uint64_t TargetAddress) { + llvm_unreachable("Re-mapping of section addresses not supported with this " + "EE!"); + } + /// runStaticConstructorsDestructors - This method is used to execute all of /// the static constructors or destructors for a program. /// @@ -462,6 +483,7 @@ class EngineBuilder { CodeGenOpt::Level OptLevel; JITMemoryManager *JMM; bool AllocateGVsWithCode; + TargetOptions Options; Reloc::Model RelocModel; CodeModel::Model CMModel; std::string MArch; @@ -475,6 +497,7 @@ class EngineBuilder { ErrorStr = NULL; OptLevel = CodeGenOpt::Default; JMM = NULL; + Options = TargetOptions(); AllocateGVsWithCode = false; RelocModel = Reloc::Default; CMModel = CodeModel::JITDefault; @@ -518,6 +541,13 @@ class EngineBuilder { return *this; } + /// setTargetOptions - Set the target options that the ExecutionEngine + /// target is using. Defaults to TargetOptions(). + EngineBuilder &setTargetOptions(const TargetOptions &Opts) { + Options = Opts; + return *this; + } + /// setRelocationModel - Set the relocation model that the ExecutionEngine /// target is using. Defaults to target specific default "Reloc::Default". EngineBuilder &setRelocationModel(Reloc::Model RM) { @@ -572,17 +602,20 @@ class EngineBuilder { return *this; } + TargetMachine *selectTarget(); + /// selectTarget - Pick a target either via -march or by guessing the native /// arch. Add any CPU features specified via -mcpu or -mattr. - static TargetMachine *selectTarget(Module *M, - StringRef MArch, - StringRef MCPU, - const SmallVectorImpl& MAttrs, - Reloc::Model RM, - CodeModel::Model CM, - std::string *Err); + TargetMachine *selectTarget(const Triple &TargetTriple, + StringRef MArch, + StringRef MCPU, + const SmallVectorImpl& MAttrs); - ExecutionEngine *create(); + ExecutionEngine *create() { + return create(selectTarget()); + } + + ExecutionEngine *create(TargetMachine *TM); }; } // End llvm namespace diff --git a/contrib/llvm/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h b/contrib/llvm/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h new file mode 100644 index 000000000000..ca873420299c --- /dev/null +++ b/contrib/llvm/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h @@ -0,0 +1,102 @@ +//===-- IntelJITEventsWrapper.h - Intel JIT Events API Wrapper --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a wrapper for the Intel JIT Events API. It allows for the +// implementation of the jitprofiling library to be swapped with an alternative +// implementation (for testing). To include this file, you must have the +// jitprofiling.h header available; it is available in Intel(R) VTune(TM) +// Amplifier XE 2011. +// +//===----------------------------------------------------------------------===// + +#ifndef INTEL_JIT_EVENTS_WRAPPER_H +#define INTEL_JIT_EVENTS_WRAPPER_H + +#include + +namespace llvm { + +class IntelJITEventsWrapper { + // Function pointer types for testing implementation of Intel jitprofiling + // library + typedef int (*NotifyEventPtr)(iJIT_JVM_EVENT, void*); + typedef void (*RegisterCallbackExPtr)(void *, iJIT_ModeChangedEx ); + typedef iJIT_IsProfilingActiveFlags (*IsProfilingActivePtr)(void); + typedef void (*FinalizeThreadPtr)(void); + typedef void (*FinalizeProcessPtr)(void); + typedef unsigned int (*GetNewMethodIDPtr)(void); + + NotifyEventPtr NotifyEventFunc; + RegisterCallbackExPtr RegisterCallbackExFunc; + IsProfilingActivePtr IsProfilingActiveFunc; + FinalizeThreadPtr FinalizeThreadFunc; + FinalizeProcessPtr FinalizeProcessFunc; + GetNewMethodIDPtr GetNewMethodIDFunc; + +public: + bool isAmplifierRunning() { + return iJIT_IsProfilingActive() == iJIT_SAMPLING_ON; + } + + IntelJITEventsWrapper() + : NotifyEventFunc(::iJIT_NotifyEvent), + RegisterCallbackExFunc(::iJIT_RegisterCallbackEx), + IsProfilingActiveFunc(::iJIT_IsProfilingActive), + FinalizeThreadFunc(::FinalizeThread), + FinalizeProcessFunc(::FinalizeProcess), + GetNewMethodIDFunc(::iJIT_GetNewMethodID) { + } + + IntelJITEventsWrapper(NotifyEventPtr NotifyEventImpl, + RegisterCallbackExPtr RegisterCallbackExImpl, + IsProfilingActivePtr IsProfilingActiveImpl, + FinalizeThreadPtr FinalizeThreadImpl, + FinalizeProcessPtr FinalizeProcessImpl, + GetNewMethodIDPtr GetNewMethodIDImpl) + : NotifyEventFunc(NotifyEventImpl), + RegisterCallbackExFunc(RegisterCallbackExImpl), + IsProfilingActiveFunc(IsProfilingActiveImpl), + FinalizeThreadFunc(FinalizeThreadImpl), + FinalizeProcessFunc(FinalizeProcessImpl), + GetNewMethodIDFunc(GetNewMethodIDImpl) { + } + + // Sends an event anncouncing that a function has been emitted + // return values are event-specific. See Intel documentation for details. + int iJIT_NotifyEvent(iJIT_JVM_EVENT EventType, void *EventSpecificData) { + if (!NotifyEventFunc) + return -1; + return NotifyEventFunc(EventType, EventSpecificData); + } + + // Registers a callback function to receive notice of profiling state changes + void iJIT_RegisterCallbackEx(void *UserData, + iJIT_ModeChangedEx NewModeCallBackFuncEx) { + if (RegisterCallbackExFunc) + RegisterCallbackExFunc(UserData, NewModeCallBackFuncEx); + } + + // Returns the current profiler mode + iJIT_IsProfilingActiveFlags iJIT_IsProfilingActive(void) { + if (!IsProfilingActiveFunc) + return iJIT_NOTHING_RUNNING; + return IsProfilingActiveFunc(); + } + + // Generates a locally unique method ID for use in code registration + unsigned int iJIT_GetNewMethodID(void) { + if (!GetNewMethodIDFunc) + return -1; + return GetNewMethodIDFunc(); + } +}; + +} //namespace llvm + +#endif //INTEL_JIT_EVENTS_WRAPPER_H diff --git a/contrib/llvm/include/llvm/ExecutionEngine/JITEventListener.h b/contrib/llvm/include/llvm/ExecutionEngine/JITEventListener.h index abc063b07038..eea603fcee2c 100644 --- a/contrib/llvm/include/llvm/ExecutionEngine/JITEventListener.h +++ b/contrib/llvm/include/llvm/ExecutionEngine/JITEventListener.h @@ -15,6 +15,7 @@ #ifndef LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H #define LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H +#include "llvm/Config/config.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/DebugLoc.h" @@ -23,6 +24,8 @@ namespace llvm { class Function; class MachineFunction; +class OProfileWrapper; +class IntelJITEventsWrapper; /// JITEvent_EmittedFunctionDetails - Helper struct for containing information /// about a generated machine code function. @@ -59,9 +62,9 @@ class JITEventListener { /// NotifyFunctionEmitted - Called after a function has been successfully /// emitted to memory. The function still has its MachineFunction attached, /// if you should happen to need that. - virtual void NotifyFunctionEmitted(const Function &F, - void *Code, size_t Size, - const EmittedFunctionDetails &Details) {} + virtual void NotifyFunctionEmitted(const Function &, + void *, size_t, + const EmittedFunctionDetails &) {} /// NotifyFreeingMachineCode - Called from freeMachineCodeForFunction(), after /// the global mapping is removed, but before the machine code is returned to @@ -71,12 +74,43 @@ class JITEventListener { /// parameter to a previous NotifyFunctionEmitted call. The Function passed /// to NotifyFunctionEmitted may have been destroyed by the time of the /// matching NotifyFreeingMachineCode call. - virtual void NotifyFreeingMachineCode(void *OldPtr) {} -}; + virtual void NotifyFreeingMachineCode(void *) {} -// This returns NULL if support isn't available. -JITEventListener *createOProfileJITEventListener(); +#if LLVM_USE_INTEL_JITEVENTS + // Construct an IntelJITEventListener + static JITEventListener *createIntelJITEventListener(); + + // Construct an IntelJITEventListener with a test Intel JIT API implementation + static JITEventListener *createIntelJITEventListener( + IntelJITEventsWrapper* AlternativeImpl); +#else + static JITEventListener *createIntelJITEventListener() { return 0; } + + static JITEventListener *createIntelJITEventListener( + IntelJITEventsWrapper* AlternativeImpl) { + return 0; + } +#endif // USE_INTEL_JITEVENTS + +#if LLVM_USE_OPROFILE + // Construct an OProfileJITEventListener + static JITEventListener *createOProfileJITEventListener(); + + // Construct an OProfileJITEventListener with a test opagent implementation + static JITEventListener *createOProfileJITEventListener( + OProfileWrapper* AlternativeImpl); +#else + + static JITEventListener *createOProfileJITEventListener() { return 0; } + + static JITEventListener *createOProfileJITEventListener( + OProfileWrapper* AlternativeImpl) { + return 0; + } +#endif // USE_OPROFILE + +}; } // end namespace llvm. -#endif +#endif // defined LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H diff --git a/contrib/llvm/include/llvm/ExecutionEngine/JITMemoryManager.h b/contrib/llvm/include/llvm/ExecutionEngine/JITMemoryManager.h index a63f0da773a2..4c75b6ab970e 100644 --- a/contrib/llvm/include/llvm/ExecutionEngine/JITMemoryManager.h +++ b/contrib/llvm/include/llvm/ExecutionEngine/JITMemoryManager.h @@ -47,6 +47,17 @@ class JITMemoryManager { /// debugging, and may be turned on by default in debug mode. virtual void setPoisonMemory(bool poison) = 0; + /// getPointerToNamedFunction - This method returns the address of the + /// specified function. As such it is only useful for resolving library + /// symbols, not code generated symbols. + /// + /// If AbortOnFailure is false and no function with the given name is + /// found, this function silently returns a null pointer. Otherwise, + /// it prints a message to stderr and aborts. + /// + virtual void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true) = 0; + //===--------------------------------------------------------------------===// // Global Offset Table Management //===--------------------------------------------------------------------===// @@ -101,6 +112,22 @@ class JITMemoryManager { virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart, uint8_t *FunctionEnd) = 0; + /// allocateCodeSection - Allocate a memory block of (at least) the given + /// size suitable for executable code. The SectionID is a unique identifier + /// assigned by the JIT and passed through to the memory manager for + /// the instance class to use if it needs to communicate to the JIT about + /// a given section after the fact. + virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID) = 0; + + /// allocateDataSection - Allocate a memory block of (at least) the given + /// size suitable for data. The SectionID is a unique identifier + /// assigned by the JIT and passed through to the memory manager for + /// the instance class to use if it needs to communicate to the JIT about + /// a given section after the fact. + virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID) = 0; + /// allocateSpace - Allocate a memory block of the given size. This method /// cannot be called between calls to startFunctionBody and endFunctionBody. virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) = 0; diff --git a/contrib/llvm/include/llvm/ExecutionEngine/OProfileWrapper.h b/contrib/llvm/include/llvm/ExecutionEngine/OProfileWrapper.h new file mode 100644 index 000000000000..ab7f25e9d03d --- /dev/null +++ b/contrib/llvm/include/llvm/ExecutionEngine/OProfileWrapper.h @@ -0,0 +1,124 @@ +//===-- OProfileWrapper.h - OProfile JIT API Wrapper ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This file defines a OProfileWrapper object that detects if the oprofile +// daemon is running, and provides wrappers for opagent functions used to +// communicate with the oprofile JIT interface. The dynamic library libopagent +// does not need to be linked directly as this object lazily loads the library +// when the first op_ function is called. +// +// See http://oprofile.sourceforge.net/doc/devel/jit-interface.html for the +// definition of the interface. +// +//===----------------------------------------------------------------------===// + +#ifndef OPROFILE_WRAPPER_H +#define OPROFILE_WRAPPER_H + +#include "llvm/Support/DataTypes.h" +#include + +namespace llvm { + + +class OProfileWrapper { + typedef op_agent_t (*op_open_agent_ptr_t)(); + typedef int (*op_close_agent_ptr_t)(op_agent_t); + typedef int (*op_write_native_code_ptr_t)(op_agent_t, + const char*, + uint64_t, + void const*, + const unsigned int); + typedef int (*op_write_debug_line_info_ptr_t)(op_agent_t, + void const*, + size_t, + struct debug_line_info const*); + typedef int (*op_unload_native_code_ptr_t)(op_agent_t, uint64_t); + + // Also used for op_minor_version function which has the same signature + typedef int (*op_major_version_ptr_t)(void); + + // This is not a part of the opagent API, but is useful nonetheless + typedef bool (*IsOProfileRunningPtrT)(void); + + + op_agent_t Agent; + op_open_agent_ptr_t OpenAgentFunc; + op_close_agent_ptr_t CloseAgentFunc; + op_write_native_code_ptr_t WriteNativeCodeFunc; + op_write_debug_line_info_ptr_t WriteDebugLineInfoFunc; + op_unload_native_code_ptr_t UnloadNativeCodeFunc; + op_major_version_ptr_t MajorVersionFunc; + op_major_version_ptr_t MinorVersionFunc; + IsOProfileRunningPtrT IsOProfileRunningFunc; + + bool Initialized; + +public: + OProfileWrapper(); + + // For testing with a mock opagent implementation, skips the dynamic load and + // the function resolution. + OProfileWrapper(op_open_agent_ptr_t OpenAgentImpl, + op_close_agent_ptr_t CloseAgentImpl, + op_write_native_code_ptr_t WriteNativeCodeImpl, + op_write_debug_line_info_ptr_t WriteDebugLineInfoImpl, + op_unload_native_code_ptr_t UnloadNativeCodeImpl, + op_major_version_ptr_t MajorVersionImpl, + op_major_version_ptr_t MinorVersionImpl, + IsOProfileRunningPtrT MockIsOProfileRunningImpl = 0) + : OpenAgentFunc(OpenAgentImpl), + CloseAgentFunc(CloseAgentImpl), + WriteNativeCodeFunc(WriteNativeCodeImpl), + WriteDebugLineInfoFunc(WriteDebugLineInfoImpl), + UnloadNativeCodeFunc(UnloadNativeCodeImpl), + MajorVersionFunc(MajorVersionImpl), + MinorVersionFunc(MinorVersionImpl), + IsOProfileRunningFunc(MockIsOProfileRunningImpl), + Initialized(true) + { + } + + // Calls op_open_agent in the oprofile JIT library and saves the returned + // op_agent_t handle internally so it can be used when calling all the other + // op_* functions. Callers of this class do not need to keep track of + // op_agent_t objects. + bool op_open_agent(); + + int op_close_agent(); + int op_write_native_code(const char* name, + uint64_t addr, + void const* code, + const unsigned int size); + int op_write_debug_line_info(void const* code, + size_t num_entries, + struct debug_line_info const* info); + int op_unload_native_code(uint64_t addr); + int op_major_version(void); + int op_minor_version(void); + + // Returns true if the oprofiled process is running, the opagent library is + // loaded and a connection to the agent has been established, and false + // otherwise. + bool isAgentAvailable(); + +private: + // Loads the libopagent library and initializes this wrapper if the oprofile + // daemon is running + bool initialize(); + + // Searches /proc for the oprofile daemon and returns true if the process if + // found, or false otherwise. + bool checkForOProfileProcEntry(); + + bool isOProfileRunning(); +}; + +} // namespace llvm + +#endif //OPROFILE_WRAPPER_H diff --git a/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h b/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h index 724b9f09e0e5..54c28f3ec142 100644 --- a/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h +++ b/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h @@ -35,15 +35,18 @@ class RTDyldMemoryManager { RTDyldMemoryManager() {} virtual ~RTDyldMemoryManager(); - // Allocate ActualSize bytes, or more, for the named function. Return - // a pointer to the allocated memory and update Size to reflect how much - // memory was acutally allocated. - virtual uint8_t *startFunctionBody(const char *Name, uintptr_t &Size) = 0; + /// allocateCodeSection - Allocate a memory block of (at least) the given + /// size suitable for executable code. + virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID) = 0; - // Mark the end of the function, including how much of the allocated - // memory was actually used. - virtual void endFunctionBody(const char *Name, uint8_t *FunctionStart, - uint8_t *FunctionEnd) = 0; + /// allocateDataSection - Allocate a memory block of (at least) the given + /// size suitable for data. + virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID) = 0; + + virtual void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true) = 0; }; class RuntimeDyld { @@ -54,6 +57,10 @@ class RuntimeDyld { // interface. RuntimeDyldImpl *Dyld; RTDyldMemoryManager *MM; +protected: + // Change the address associated with a section when resolving relocations. + // Any relocations already associated with the symbol will be re-resolved. + void reassignSectionAddress(unsigned SectionID, uint64_t Addr); public: RuntimeDyld(RTDyldMemoryManager*); ~RuntimeDyld(); @@ -65,9 +72,13 @@ class RuntimeDyld { void *getSymbolAddress(StringRef Name); // Resolve the relocations for all symbols we currently know about. void resolveRelocations(); - // Change the address associated with a symbol when resolving relocations. - // Any relocations already associated with the symbol will be re-resolved. - void reassignSymbolAddress(StringRef Name, uint8_t *Addr); + + /// mapSectionAddress - map a section to its target address space value. + /// Map the address of a JIT section as returned from the memory manager + /// to the address in the target process as the running code will see it. + /// This is the address which will be used for relocation resolution. + void mapSectionAddress(void *LocalAddress, uint64_t TargetAddress); + StringRef getErrorString(); }; diff --git a/contrib/llvm/include/llvm/Function.h b/contrib/llvm/include/llvm/Function.h index 678651bbf1f8..e17cd87fe348 100644 --- a/contrib/llvm/include/llvm/Function.h +++ b/contrib/llvm/include/llvm/Function.h @@ -146,7 +146,7 @@ class Function : public GlobalValue, /// The particular intrinsic functions which correspond to this value are /// defined in llvm/Intrinsics.h. /// - unsigned getIntrinsicID() const LLVM_ATTRIBUTE_READONLY; + unsigned getIntrinsicID() const LLVM_READONLY; bool isIntrinsic() const { return getIntrinsicID() != 0; } /// getCallingConv()/setCallingConv(CC) - These method get and set the @@ -425,6 +425,12 @@ class Function : public GlobalValue, /// bool hasAddressTaken(const User** = 0) const; + /// isDefTriviallyDead - Return true if it is trivially safe to remove + /// this function definition from the module (because it isn't externally + /// visible, does not have its address taken, and has no callers). To make + /// this more accurate, call removeDeadConstantUsers first. + bool isDefTriviallyDead() const; + /// callsFunctionThatReturnsTwice - Return true if the function has a call to /// setjmp or other function that gcc recognizes as "returning twice". bool callsFunctionThatReturnsTwice() const; diff --git a/contrib/llvm/include/llvm/GlobalValue.h b/contrib/llvm/include/llvm/GlobalValue.h index 63dc4ab6bae0..81a11a4c9258 100644 --- a/contrib/llvm/include/llvm/GlobalValue.h +++ b/contrib/llvm/include/llvm/GlobalValue.h @@ -59,19 +59,18 @@ class GlobalValue : public Constant { protected: GlobalValue(Type *ty, ValueTy vty, Use *Ops, unsigned NumOps, LinkageTypes linkage, const Twine &Name) - : Constant(ty, vty, Ops, NumOps), Parent(0), - Linkage(linkage), Visibility(DefaultVisibility), Alignment(0), - UnnamedAddr(0) { + : Constant(ty, vty, Ops, NumOps), Linkage(linkage), + Visibility(DefaultVisibility), Alignment(0), UnnamedAddr(0), Parent(0) { setName(Name); } - Module *Parent; // Note: VC++ treats enums as signed, so an extra bit is required to prevent // Linkage and Visibility from turning into negative values. LinkageTypes Linkage : 5; // The linkage of this global unsigned Visibility : 2; // The visibility style of this global unsigned Alignment : 16; // Alignment of this symbol, must be power of two unsigned UnnamedAddr : 1; // This value's address is not significant + Module *Parent; // The containing module. std::string Section; // Section to emit this into, empty mean default public: ~GlobalValue() { diff --git a/contrib/llvm/include/llvm/InitializePasses.h b/contrib/llvm/include/llvm/InitializePasses.h index c91fbf8de812..33d20435de1d 100644 --- a/contrib/llvm/include/llvm/InitializePasses.h +++ b/contrib/llvm/include/llvm/InitializePasses.h @@ -31,6 +31,10 @@ void initializeTransformUtils(PassRegistry&); /// ScalarOpts library. void initializeScalarOpts(PassRegistry&); +/// initializeVectorization - Initialize all passes linked into the +/// Vectorize library. +void initializeVectorization(PassRegistry&); + /// initializeInstCombine - Initialize all passes linked into the /// ScalarOpts library. void initializeInstCombine(PassRegistry&); @@ -67,6 +71,7 @@ void initializeBasicCallGraphPass(PassRegistry&); void initializeBlockExtractorPassPass(PassRegistry&); void initializeBlockFrequencyInfoPass(PassRegistry&); void initializeBlockPlacementPass(PassRegistry&); +void initializeBranchFolderPassPass(PassRegistry&); void initializeBranchProbabilityInfoPass(PassRegistry&); void initializeBreakCriticalEdgesPass(PassRegistry&); void initializeCFGOnlyPrinterPass(PassRegistry&); @@ -77,8 +82,10 @@ void initializeCFGViewerPass(PassRegistry&); void initializeCalculateSpillWeightsPass(PassRegistry&); void initializeCallGraphAnalysisGroup(PassRegistry&); void initializeCodeGenPreparePass(PassRegistry&); +void initializeCodePlacementOptPass(PassRegistry&); void initializeConstantMergePass(PassRegistry&); void initializeConstantPropagationPass(PassRegistry&); +void initializeMachineCopyPropagationPass(PassRegistry&); void initializeCorrelatedValuePropagationPass(PassRegistry&); void initializeDAEPass(PassRegistry&); void initializeDAHPass(PassRegistry&); @@ -94,12 +101,17 @@ void initializeDominanceFrontierPass(PassRegistry&); void initializeDominatorTreePass(PassRegistry&); void initializeEdgeBundlesPass(PassRegistry&); void initializeEdgeProfilerPass(PassRegistry&); +void initializeExpandPostRAPass(PassRegistry&); void initializePathProfilerPass(PassRegistry&); void initializeGCOVProfilerPass(PassRegistry&); +void initializeAddressSanitizerPass(PassRegistry&); +void initializeThreadSanitizerPass(PassRegistry&); void initializeEarlyCSEPass(PassRegistry&); void initializeExpandISelPseudosPass(PassRegistry&); void initializeFindUsedTypesPass(PassRegistry&); void initializeFunctionAttrsPass(PassRegistry&); +void initializeGCInfoDeleterPass(PassRegistry&); +void initializeGCMachineCodeAnalysisPass(PassRegistry&); void initializeGCModuleInfoPass(PassRegistry&); void initializeGVNPass(PassRegistry&); void initializeGlobalDCEPass(PassRegistry&); @@ -127,6 +139,7 @@ void initializeLiveStacksPass(PassRegistry&); void initializeLiveVariablesPass(PassRegistry&); void initializeLoaderPassPass(PassRegistry&); void initializePathProfileLoaderPassPass(PassRegistry&); +void initializeLocalStackSlotPassPass(PassRegistry&); void initializeLoopDeletionPass(PassRegistry&); void initializeLoopDependenceAnalysisPass(PassRegistry&); void initializeLoopExtractorPass(PassRegistry&); @@ -134,8 +147,8 @@ void initializeLoopInfoPass(PassRegistry&); void initializeLoopInstSimplifyPass(PassRegistry&); void initializeLoopRotatePass(PassRegistry&); void initializeLoopSimplifyPass(PassRegistry&); -void initializeLoopSplitterPass(PassRegistry&); void initializeLoopStrengthReducePass(PassRegistry&); +void initializeGlobalMergePass(PassRegistry&); void initializeLoopUnrollPass(PassRegistry&); void initializeLoopUnswitchPass(PassRegistry&); void initializeLoopIdiomRecognizePass(PassRegistry&); @@ -145,6 +158,8 @@ void initializeLowerIntrinsicsPass(PassRegistry&); void initializeLowerInvokePass(PassRegistry&); void initializeLowerSwitchPass(PassRegistry&); void initializeMachineBlockFrequencyInfoPass(PassRegistry&); +void initializeMachineBlockPlacementPass(PassRegistry&); +void initializeMachineBlockPlacementStatsPass(PassRegistry&); void initializeMachineBranchProbabilityInfoPass(PassRegistry&); void initializeMachineCSEPass(PassRegistry&); void initializeMachineDominatorTreePass(PassRegistry&); @@ -152,6 +167,7 @@ void initializeMachineLICMPass(PassRegistry&); void initializeMachineLoopInfoPass(PassRegistry&); void initializeMachineLoopRangesPass(PassRegistry&); void initializeMachineModuleInfoPass(PassRegistry&); +void initializeMachineSchedulerPass(PassRegistry&); void initializeMachineSinkingPass(PassRegistry&); void initializeMachineVerifierPassPass(PassRegistry&); void initializeMemCpyOptPass(PassRegistry&); @@ -163,6 +179,7 @@ void initializeNoAAPass(PassRegistry&); void initializeNoProfileInfoPass(PassRegistry&); void initializeNoPathProfileInfoPass(PassRegistry&); void initializeObjCARCAliasAnalysisPass(PassRegistry&); +void initializeObjCARCAPElimPass(PassRegistry&); void initializeObjCARCExpandPass(PassRegistry&); void initializeObjCARCContractPass(PassRegistry&); void initializeObjCARCOptPass(PassRegistry&); @@ -177,6 +194,7 @@ void initializePostDomOnlyViewerPass(PassRegistry&); void initializePostDomPrinterPass(PassRegistry&); void initializePostDomViewerPass(PassRegistry&); void initializePostDominatorTreePass(PassRegistry&); +void initializePostRASchedulerPass(PassRegistry&); void initializePreVerifierPass(PassRegistry&); void initializePrintDbgInfoPass(PassRegistry&); void initializePrintFunctionPassPass(PassRegistry&); @@ -189,7 +207,6 @@ void initializePathProfileVerifierPass(PassRegistry&); void initializeProfileVerifierPassPass(PassRegistry&); void initializePromotePassPass(PassRegistry&); void initializePruneEHPass(PassRegistry&); -void initializeRALinScanPass(PassRegistry&); void initializeReassociatePass(PassRegistry&); void initializeRegToMemPass(PassRegistry&); void initializeRegionInfoPass(PassRegistry&); @@ -219,6 +236,8 @@ void initializeStripNonDebugSymbolsPass(PassRegistry&); void initializeStripSymbolsPass(PassRegistry&); void initializeStrongPHIEliminationPass(PassRegistry&); void initializeTailCallElimPass(PassRegistry&); +void initializeTailDuplicatePassPass(PassRegistry&); +void initializeTargetPassConfigPass(PassRegistry&); void initializeTargetDataPass(PassRegistry&); void initializeTargetLibraryInfoPass(PassRegistry&); void initializeTwoAddressInstructionPassPass(PassRegistry&); @@ -229,7 +248,9 @@ void initializeUnreachableMachineBlockElimPass(PassRegistry&); void initializeVerifierPass(PassRegistry&); void initializeVirtRegMapPass(PassRegistry&); void initializeInstSimplifierPass(PassRegistry&); - +void initializeUnpackMachineBundlesPass(PassRegistry&); +void initializeFinalizeMachineBundlesPass(PassRegistry&); +void initializeBBVectorizePass(PassRegistry&); } #endif diff --git a/contrib/llvm/include/llvm/InlineAsm.h b/contrib/llvm/include/llvm/InlineAsm.h index de5ce4ecafc7..37aa18bfff73 100644 --- a/contrib/llvm/include/llvm/InlineAsm.h +++ b/contrib/llvm/include/llvm/InlineAsm.h @@ -17,6 +17,7 @@ #define LLVM_INLINEASM_H #include "llvm/Value.h" +#include "llvm/ADT/StringRef.h" #include namespace llvm { diff --git a/contrib/llvm/include/llvm/InstrTypes.h b/contrib/llvm/include/llvm/InstrTypes.h index a1492f3c141a..2529f24fe991 100644 --- a/contrib/llvm/include/llvm/InstrTypes.h +++ b/contrib/llvm/include/llvm/InstrTypes.h @@ -388,6 +388,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryOperator, Value) /// if (isa(Instr)) { ... } /// @brief Base class of casting instructions. class CastInst : public UnaryInstruction { + virtual void anchor(); protected: /// @brief Constructor with insert-before-instruction semantics for subclasses CastInst(Type *Ty, unsigned iType, Value *S, diff --git a/contrib/llvm/include/llvm/Instruction.def b/contrib/llvm/include/llvm/Instruction.def index d36e4be1d912..e59a0528e90f 100644 --- a/contrib/llvm/include/llvm/Instruction.def +++ b/contrib/llvm/include/llvm/Instruction.def @@ -99,81 +99,80 @@ HANDLE_TERM_INST ( 2, Br , BranchInst) HANDLE_TERM_INST ( 3, Switch , SwitchInst) HANDLE_TERM_INST ( 4, IndirectBr , IndirectBrInst) HANDLE_TERM_INST ( 5, Invoke , InvokeInst) -HANDLE_TERM_INST ( 6, Unwind , UnwindInst) -HANDLE_TERM_INST ( 7, Resume , ResumeInst) -HANDLE_TERM_INST ( 8, Unreachable, UnreachableInst) - LAST_TERM_INST ( 8) +HANDLE_TERM_INST ( 6, Resume , ResumeInst) +HANDLE_TERM_INST ( 7, Unreachable, UnreachableInst) + LAST_TERM_INST ( 7) // Standard binary operators... - FIRST_BINARY_INST( 9) -HANDLE_BINARY_INST( 9, Add , BinaryOperator) -HANDLE_BINARY_INST(10, FAdd , BinaryOperator) -HANDLE_BINARY_INST(11, Sub , BinaryOperator) -HANDLE_BINARY_INST(12, FSub , BinaryOperator) -HANDLE_BINARY_INST(13, Mul , BinaryOperator) -HANDLE_BINARY_INST(14, FMul , BinaryOperator) -HANDLE_BINARY_INST(15, UDiv , BinaryOperator) -HANDLE_BINARY_INST(16, SDiv , BinaryOperator) -HANDLE_BINARY_INST(17, FDiv , BinaryOperator) -HANDLE_BINARY_INST(18, URem , BinaryOperator) -HANDLE_BINARY_INST(19, SRem , BinaryOperator) -HANDLE_BINARY_INST(20, FRem , BinaryOperator) + FIRST_BINARY_INST( 8) +HANDLE_BINARY_INST( 8, Add , BinaryOperator) +HANDLE_BINARY_INST( 9, FAdd , BinaryOperator) +HANDLE_BINARY_INST(10, Sub , BinaryOperator) +HANDLE_BINARY_INST(11, FSub , BinaryOperator) +HANDLE_BINARY_INST(12, Mul , BinaryOperator) +HANDLE_BINARY_INST(13, FMul , BinaryOperator) +HANDLE_BINARY_INST(14, UDiv , BinaryOperator) +HANDLE_BINARY_INST(15, SDiv , BinaryOperator) +HANDLE_BINARY_INST(16, FDiv , BinaryOperator) +HANDLE_BINARY_INST(17, URem , BinaryOperator) +HANDLE_BINARY_INST(18, SRem , BinaryOperator) +HANDLE_BINARY_INST(19, FRem , BinaryOperator) // Logical operators (integer operands) -HANDLE_BINARY_INST(21, Shl , BinaryOperator) // Shift left (logical) -HANDLE_BINARY_INST(22, LShr , BinaryOperator) // Shift right (logical) -HANDLE_BINARY_INST(23, AShr , BinaryOperator) // Shift right (arithmetic) -HANDLE_BINARY_INST(24, And , BinaryOperator) -HANDLE_BINARY_INST(25, Or , BinaryOperator) -HANDLE_BINARY_INST(26, Xor , BinaryOperator) - LAST_BINARY_INST(26) +HANDLE_BINARY_INST(20, Shl , BinaryOperator) // Shift left (logical) +HANDLE_BINARY_INST(21, LShr , BinaryOperator) // Shift right (logical) +HANDLE_BINARY_INST(22, AShr , BinaryOperator) // Shift right (arithmetic) +HANDLE_BINARY_INST(23, And , BinaryOperator) +HANDLE_BINARY_INST(24, Or , BinaryOperator) +HANDLE_BINARY_INST(25, Xor , BinaryOperator) + LAST_BINARY_INST(25) // Memory operators... - FIRST_MEMORY_INST(27) -HANDLE_MEMORY_INST(27, Alloca, AllocaInst) // Stack management -HANDLE_MEMORY_INST(28, Load , LoadInst ) // Memory manipulation instrs -HANDLE_MEMORY_INST(29, Store , StoreInst ) -HANDLE_MEMORY_INST(30, GetElementPtr, GetElementPtrInst) -HANDLE_MEMORY_INST(31, Fence , FenceInst ) -HANDLE_MEMORY_INST(32, AtomicCmpXchg , AtomicCmpXchgInst ) -HANDLE_MEMORY_INST(33, AtomicRMW , AtomicRMWInst ) - LAST_MEMORY_INST(33) + FIRST_MEMORY_INST(26) +HANDLE_MEMORY_INST(26, Alloca, AllocaInst) // Stack management +HANDLE_MEMORY_INST(27, Load , LoadInst ) // Memory manipulation instrs +HANDLE_MEMORY_INST(28, Store , StoreInst ) +HANDLE_MEMORY_INST(29, GetElementPtr, GetElementPtrInst) +HANDLE_MEMORY_INST(30, Fence , FenceInst ) +HANDLE_MEMORY_INST(31, AtomicCmpXchg , AtomicCmpXchgInst ) +HANDLE_MEMORY_INST(32, AtomicRMW , AtomicRMWInst ) + LAST_MEMORY_INST(32) // Cast operators ... // NOTE: The order matters here because CastInst::isEliminableCastPair // NOTE: (see Instructions.cpp) encodes a table based on this ordering. - FIRST_CAST_INST(34) -HANDLE_CAST_INST(34, Trunc , TruncInst ) // Truncate integers -HANDLE_CAST_INST(35, ZExt , ZExtInst ) // Zero extend integers -HANDLE_CAST_INST(36, SExt , SExtInst ) // Sign extend integers -HANDLE_CAST_INST(37, FPToUI , FPToUIInst ) // floating point -> UInt -HANDLE_CAST_INST(38, FPToSI , FPToSIInst ) // floating point -> SInt -HANDLE_CAST_INST(39, UIToFP , UIToFPInst ) // UInt -> floating point -HANDLE_CAST_INST(40, SIToFP , SIToFPInst ) // SInt -> floating point -HANDLE_CAST_INST(41, FPTrunc , FPTruncInst ) // Truncate floating point -HANDLE_CAST_INST(42, FPExt , FPExtInst ) // Extend floating point -HANDLE_CAST_INST(43, PtrToInt, PtrToIntInst) // Pointer -> Integer -HANDLE_CAST_INST(44, IntToPtr, IntToPtrInst) // Integer -> Pointer -HANDLE_CAST_INST(45, BitCast , BitCastInst ) // Type cast - LAST_CAST_INST(45) + FIRST_CAST_INST(33) +HANDLE_CAST_INST(33, Trunc , TruncInst ) // Truncate integers +HANDLE_CAST_INST(34, ZExt , ZExtInst ) // Zero extend integers +HANDLE_CAST_INST(35, SExt , SExtInst ) // Sign extend integers +HANDLE_CAST_INST(36, FPToUI , FPToUIInst ) // floating point -> UInt +HANDLE_CAST_INST(37, FPToSI , FPToSIInst ) // floating point -> SInt +HANDLE_CAST_INST(38, UIToFP , UIToFPInst ) // UInt -> floating point +HANDLE_CAST_INST(39, SIToFP , SIToFPInst ) // SInt -> floating point +HANDLE_CAST_INST(40, FPTrunc , FPTruncInst ) // Truncate floating point +HANDLE_CAST_INST(41, FPExt , FPExtInst ) // Extend floating point +HANDLE_CAST_INST(42, PtrToInt, PtrToIntInst) // Pointer -> Integer +HANDLE_CAST_INST(43, IntToPtr, IntToPtrInst) // Integer -> Pointer +HANDLE_CAST_INST(44, BitCast , BitCastInst ) // Type cast + LAST_CAST_INST(44) // Other operators... - FIRST_OTHER_INST(46) -HANDLE_OTHER_INST(46, ICmp , ICmpInst ) // Integer comparison instruction -HANDLE_OTHER_INST(47, FCmp , FCmpInst ) // Floating point comparison instr. -HANDLE_OTHER_INST(48, PHI , PHINode ) // PHI node instruction -HANDLE_OTHER_INST(49, Call , CallInst ) // Call a function -HANDLE_OTHER_INST(50, Select , SelectInst ) // select instruction -HANDLE_OTHER_INST(51, UserOp1, Instruction) // May be used internally in a pass -HANDLE_OTHER_INST(52, UserOp2, Instruction) // Internal to passes only -HANDLE_OTHER_INST(53, VAArg , VAArgInst ) // vaarg instruction -HANDLE_OTHER_INST(54, ExtractElement, ExtractElementInst)// extract from vector -HANDLE_OTHER_INST(55, InsertElement, InsertElementInst) // insert into vector -HANDLE_OTHER_INST(56, ShuffleVector, ShuffleVectorInst) // shuffle two vectors. -HANDLE_OTHER_INST(57, ExtractValue, ExtractValueInst)// extract from aggregate -HANDLE_OTHER_INST(58, InsertValue, InsertValueInst) // insert into aggregate -HANDLE_OTHER_INST(59, LandingPad, LandingPadInst) // Landing pad instruction. - LAST_OTHER_INST(59) + FIRST_OTHER_INST(45) +HANDLE_OTHER_INST(45, ICmp , ICmpInst ) // Integer comparison instruction +HANDLE_OTHER_INST(46, FCmp , FCmpInst ) // Floating point comparison instr. +HANDLE_OTHER_INST(47, PHI , PHINode ) // PHI node instruction +HANDLE_OTHER_INST(48, Call , CallInst ) // Call a function +HANDLE_OTHER_INST(49, Select , SelectInst ) // select instruction +HANDLE_OTHER_INST(50, UserOp1, Instruction) // May be used internally in a pass +HANDLE_OTHER_INST(51, UserOp2, Instruction) // Internal to passes only +HANDLE_OTHER_INST(52, VAArg , VAArgInst ) // vaarg instruction +HANDLE_OTHER_INST(53, ExtractElement, ExtractElementInst)// extract from vector +HANDLE_OTHER_INST(54, InsertElement, InsertElementInst) // insert into vector +HANDLE_OTHER_INST(55, ShuffleVector, ShuffleVectorInst) // shuffle two vectors. +HANDLE_OTHER_INST(56, ExtractValue, ExtractValueInst)// extract from aggregate +HANDLE_OTHER_INST(57, InsertValue, InsertValueInst) // insert into aggregate +HANDLE_OTHER_INST(58, LandingPad, LandingPadInst) // Landing pad instruction. + LAST_OTHER_INST(58) #undef FIRST_TERM_INST #undef HANDLE_TERM_INST diff --git a/contrib/llvm/include/llvm/Instruction.h b/contrib/llvm/include/llvm/Instruction.h index 934e890151f0..9c5ac4430f89 100644 --- a/contrib/llvm/include/llvm/Instruction.h +++ b/contrib/llvm/include/llvm/Instruction.h @@ -143,7 +143,7 @@ class Instruction : public User, public ilist_node { /// getMetadata - Get the metadata of given kind attached to this Instruction. /// If the metadata is not found then return null. - MDNode *getMetadata(const char *Kind) const { + MDNode *getMetadata(StringRef Kind) const { if (!hasMetadata()) return 0; return getMetadataImpl(Kind); } @@ -168,7 +168,7 @@ class Instruction : public User, public ilist_node { /// node. This updates/replaces metadata if already present, or removes it if /// Node is null. void setMetadata(unsigned KindID, MDNode *Node); - void setMetadata(const char *Kind, MDNode *Node); + void setMetadata(StringRef Kind, MDNode *Node); /// setDebugLoc - Set the debug location information for this instruction. void setDebugLoc(const DebugLoc &Loc) { DbgLoc = Loc; } @@ -185,7 +185,7 @@ class Instruction : public User, public ilist_node { // These are all implemented in Metadata.cpp. MDNode *getMetadataImpl(unsigned KindID) const; - MDNode *getMetadataImpl(const char *Kind) const; + MDNode *getMetadataImpl(StringRef Kind) const; void getAllMetadataImpl(SmallVectorImpl > &)const; void getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl > &) const; @@ -244,26 +244,6 @@ class Instruction : public User, public ilist_node { return mayWriteToMemory() || mayThrow(); } - /// isSafeToSpeculativelyExecute - Return true if the instruction does not - /// have any effects besides calculating the result and does not have - /// undefined behavior. - /// - /// This method never returns true for an instruction that returns true for - /// mayHaveSideEffects; however, this method also does some other checks in - /// addition. It checks for undefined behavior, like dividing by zero or - /// loading from an invalid pointer (but not for undefined results, like a - /// shift with a shift amount larger than the width of the result). It checks - /// for malloc and alloca because speculatively executing them might cause a - /// memory leak. It also returns false for instructions related to control - /// flow, specifically terminators and PHI nodes. - /// - /// This method only looks at the instruction itself and its operands, so if - /// this method returns true, it is safe to move the instruction as long as - /// the correct dominance relationships for the operands and users hold. - /// However, this method can return true for instructions that read memory; - /// for such instructions, moving them may change the resulting value. - bool isSafeToSpeculativelyExecute() const; - /// clone() - Create a copy of 'this' instruction that is identical in all /// ways except the following: /// * The instruction has no parent diff --git a/contrib/llvm/include/llvm/Instructions.h b/contrib/llvm/include/llvm/Instructions.h index 3faab35bf687..f6eaf04fd0d9 100644 --- a/contrib/llvm/include/llvm/Instructions.h +++ b/contrib/llvm/include/llvm/Instructions.h @@ -776,6 +776,10 @@ class GetElementPtrInst : public Instruction { static Type *getIndexedType(Type *Ptr, ArrayRef IdxList); static Type *getIndexedType(Type *Ptr, ArrayRef IdxList); + /// getIndexedType - Returns the address space used by the GEP pointer. + /// + static unsigned getAddressSpace(Value *Ptr); + inline op_iterator idx_begin() { return op_begin()+1; } inline const_op_iterator idx_begin() const { return op_begin()+1; } inline op_iterator idx_end() { return op_end(); } @@ -788,7 +792,7 @@ class GetElementPtrInst : public Instruction { return getOperand(0); } static unsigned getPointerOperandIndex() { - return 0U; // get index for modifying correct operand + return 0U; // get index for modifying correct operand. } unsigned getPointerAddressSpace() const { @@ -797,10 +801,25 @@ class GetElementPtrInst : public Instruction { /// getPointerOperandType - Method to return the pointer operand as a /// PointerType. - PointerType *getPointerOperandType() const { - return reinterpret_cast(getPointerOperand()->getType()); + Type *getPointerOperandType() const { + return getPointerOperand()->getType(); } + /// GetGEPReturnType - Returns the pointer type returned by the GEP + /// instruction, which may be a vector of pointers. + static Type *getGEPReturnType(Value *Ptr, ArrayRef IdxList) { + Type *PtrTy = PointerType::get(checkGEPType( + getIndexedType(Ptr->getType(), IdxList)), + getAddressSpace(Ptr)); + // Vector GEP + if (Ptr->getType()->isVectorTy()) { + unsigned NumElem = cast(Ptr->getType())->getNumElements(); + return VectorType::get(PtrTy, NumElem); + } + + // Scalar GEP + return PtrTy; + } unsigned getNumIndices() const { // Note: always non-negative return getNumOperands() - 1; @@ -847,10 +866,7 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr, unsigned Values, const Twine &NameStr, Instruction *InsertBefore) - : Instruction(PointerType::get(checkGEPType( - getIndexedType(Ptr->getType(), IdxList)), - cast(Ptr->getType()) - ->getAddressSpace()), + : Instruction(getGEPReturnType(Ptr, IdxList), GetElementPtr, OperandTraits::op_end(this) - Values, Values, InsertBefore) { @@ -861,10 +877,7 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr, unsigned Values, const Twine &NameStr, BasicBlock *InsertAtEnd) - : Instruction(PointerType::get(checkGEPType( - getIndexedType(Ptr->getType(), IdxList)), - cast(Ptr->getType()) - ->getAddressSpace()), + : Instruction(getGEPReturnType(Ptr, IdxList), GetElementPtr, OperandTraits::op_end(this) - Values, Values, InsertAtEnd) { @@ -905,7 +918,7 @@ class ICmpInst: public CmpInst { "Both operands to ICmp instruction are not of the same type!"); // Check that the operands are the right type assert((getOperand(0)->getType()->isIntOrIntVectorTy() || - getOperand(0)->getType()->isPointerTy()) && + getOperand(0)->getType()->getScalarType()->isPointerTy()) && "Invalid operand types for ICmp instruction"); } @@ -945,7 +958,7 @@ class ICmpInst: public CmpInst { "Both operands to ICmp instruction are not of the same type!"); // Check that the operands are the right type assert((getOperand(0)->getType()->isIntOrIntVectorTy() || - getOperand(0)->getType()->isPointerTy()) && + getOperand(0)->getType()->getScalarType()->isPointerTy()) && "Invalid operand types for ICmp instruction"); } @@ -1657,10 +1670,33 @@ class ShuffleVectorInst : public Instruction { /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + Constant *getMask() const { + return reinterpret_cast(getOperand(2)); + } + /// getMaskValue - Return the index from the shuffle mask for the specified /// output result. This is either -1 if the element is undef or a number less /// than 2*numelements. - int getMaskValue(unsigned i) const; + static int getMaskValue(Constant *Mask, unsigned i); + + int getMaskValue(unsigned i) const { + return getMaskValue(getMask(), i); + } + + /// getShuffleMask - Return the full mask for this instruction, where each + /// element is the element number and undef's are returned as -1. + static void getShuffleMask(Constant *Mask, SmallVectorImpl &Result); + + void getShuffleMask(SmallVectorImpl &Result) const { + return getShuffleMask(getMask(), Result); + } + + SmallVector getShuffleMask() const { + SmallVector Mask; + getShuffleMask(Mask); + return Mask; + } + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const ShuffleVectorInst *) { return true; } @@ -2431,6 +2467,122 @@ class SwitchInst : public TerminatorInst { protected: virtual SwitchInst *clone_impl() const; public: + + // -2 + static const unsigned DefaultPseudoIndex = static_cast(~0L-1); + + template + class CaseIteratorT { + protected: + + SwitchInstTy *SI; + unsigned Index; + + public: + + typedef CaseIteratorT Self; + + /// Initializes case iterator for given SwitchInst and for given + /// case number. + CaseIteratorT(SwitchInstTy *SI, unsigned CaseNum) { + this->SI = SI; + Index = CaseNum; + } + + /// Initializes case iterator for given SwitchInst and for given + /// TerminatorInst's successor index. + static Self fromSuccessorIndex(SwitchInstTy *SI, unsigned SuccessorIndex) { + assert(SuccessorIndex < SI->getNumSuccessors() && + "Successor index # out of range!"); + return SuccessorIndex != 0 ? + Self(SI, SuccessorIndex - 1) : + Self(SI, DefaultPseudoIndex); + } + + /// Resolves case value for current case. + ConstantIntTy *getCaseValue() { + assert(Index < SI->getNumCases() && "Index out the number of cases."); + return reinterpret_cast(SI->getOperand(2 + Index*2)); + } + + /// Resolves successor for current case. + BasicBlockTy *getCaseSuccessor() { + assert((Index < SI->getNumCases() || + Index == DefaultPseudoIndex) && + "Index out the number of cases."); + return SI->getSuccessor(getSuccessorIndex()); + } + + /// Returns number of current case. + unsigned getCaseIndex() const { return Index; } + + /// Returns TerminatorInst's successor index for current case successor. + unsigned getSuccessorIndex() const { + assert((Index == DefaultPseudoIndex || Index < SI->getNumCases()) && + "Index out the number of cases."); + return Index != DefaultPseudoIndex ? Index + 1 : 0; + } + + Self operator++() { + // Check index correctness after increment. + // Note: Index == getNumCases() means end(). + assert(Index+1 <= SI->getNumCases() && "Index out the number of cases."); + ++Index; + return *this; + } + Self operator++(int) { + Self tmp = *this; + ++(*this); + return tmp; + } + Self operator--() { + // Check index correctness after decrement. + // Note: Index == getNumCases() means end(). + // Also allow "-1" iterator here. That will became valid after ++. + assert((Index == 0 || Index-1 <= SI->getNumCases()) && + "Index out the number of cases."); + --Index; + return *this; + } + Self operator--(int) { + Self tmp = *this; + --(*this); + return tmp; + } + bool operator==(const Self& RHS) const { + assert(RHS.SI == SI && "Incompatible operators."); + return RHS.Index == Index; + } + bool operator!=(const Self& RHS) const { + assert(RHS.SI == SI && "Incompatible operators."); + return RHS.Index != Index; + } + }; + + typedef CaseIteratorT + ConstCaseIt; + + class CaseIt : public CaseIteratorT { + + typedef CaseIteratorT ParentTy; + + public: + + CaseIt(const ParentTy& Src) : ParentTy(Src) {} + CaseIt(SwitchInst *SI, unsigned CaseNum) : ParentTy(SI, CaseNum) {} + + /// Sets the new value for current case. + void setValue(ConstantInt *V) { + assert(Index < SI->getNumCases() && "Index out the number of cases."); + SI->setOperand(2 + Index*2, reinterpret_cast(V)); + } + + /// Sets the new successor for current case. + void setSuccessor(BasicBlock *S) { + SI->setSuccessor(getSuccessorIndex(), S); + } + }; + static SwitchInst *Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore = 0) { return new SwitchInst(Value, Default, NumCases, InsertBefore); @@ -2439,6 +2591,7 @@ class SwitchInst : public TerminatorInst { unsigned NumCases, BasicBlock *InsertAtEnd) { return new SwitchInst(Value, Default, NumCases, InsertAtEnd); } + ~SwitchInst(); /// Provide fast operand accessors @@ -2452,61 +2605,94 @@ class SwitchInst : public TerminatorInst { return cast(getOperand(1)); } - /// getNumCases - return the number of 'cases' in this switch instruction. - /// Note that case #0 is always the default case. + void setDefaultDest(BasicBlock *DefaultCase) { + setOperand(1, reinterpret_cast(DefaultCase)); + } + + /// getNumCases - return the number of 'cases' in this switch instruction, + /// except the default case unsigned getNumCases() const { - return getNumOperands()/2; + return getNumOperands()/2 - 1; } - /// getCaseValue - Return the specified case value. Note that case #0, the - /// default destination, does not have a case value. - ConstantInt *getCaseValue(unsigned i) { - assert(i && i < getNumCases() && "Illegal case value to get!"); - return getSuccessorValue(i); + /// Returns a read/write iterator that points to the first + /// case in SwitchInst. + CaseIt case_begin() { + return CaseIt(this, 0); } - - /// getCaseValue - Return the specified case value. Note that case #0, the - /// default destination, does not have a case value. - const ConstantInt *getCaseValue(unsigned i) const { - assert(i && i < getNumCases() && "Illegal case value to get!"); - return getSuccessorValue(i); + /// Returns a read-only iterator that points to the first + /// case in the SwitchInst. + ConstCaseIt case_begin() const { + return ConstCaseIt(this, 0); } - + + /// Returns a read/write iterator that points one past the last + /// in the SwitchInst. + CaseIt case_end() { + return CaseIt(this, getNumCases()); + } + /// Returns a read-only iterator that points one past the last + /// in the SwitchInst. + ConstCaseIt case_end() const { + return ConstCaseIt(this, getNumCases()); + } + /// Returns an iterator that points to the default case. + /// Note: this iterator allows to resolve successor only. Attempt + /// to resolve case value causes an assertion. + /// Also note, that increment and decrement also causes an assertion and + /// makes iterator invalid. + CaseIt case_default() { + return CaseIt(this, DefaultPseudoIndex); + } + ConstCaseIt case_default() const { + return ConstCaseIt(this, DefaultPseudoIndex); + } + /// findCaseValue - Search all of the case values for the specified constant. - /// If it is explicitly handled, return the case number of it, otherwise - /// return 0 to indicate that it is handled by the default handler. - unsigned findCaseValue(const ConstantInt *C) const { - for (unsigned i = 1, e = getNumCases(); i != e; ++i) - if (getCaseValue(i) == C) + /// If it is explicitly handled, return the case iterator of it, otherwise + /// return default case iterator to indicate + /// that it is handled by the default handler. + CaseIt findCaseValue(const ConstantInt *C) { + for (CaseIt i = case_begin(), e = case_end(); i != e; ++i) + if (i.getCaseValue() == C) return i; - return 0; + return case_default(); } - + ConstCaseIt findCaseValue(const ConstantInt *C) const { + for (ConstCaseIt i = case_begin(), e = case_end(); i != e; ++i) + if (i.getCaseValue() == C) + return i; + return case_default(); + } + /// findCaseDest - Finds the unique case value for a given successor. Returns /// null if the successor is not found, not unique, or is the default case. ConstantInt *findCaseDest(BasicBlock *BB) { if (BB == getDefaultDest()) return NULL; ConstantInt *CI = NULL; - for (unsigned i = 1, e = getNumCases(); i != e; ++i) { - if (getSuccessor(i) == BB) { + for (CaseIt i = case_begin(), e = case_end(); i != e; ++i) { + if (i.getCaseSuccessor() == BB) { if (CI) return NULL; // Multiple cases lead to BB. - else CI = getCaseValue(i); + else CI = i.getCaseValue(); } } return CI; } /// addCase - Add an entry to the switch instruction... - /// + /// Note: + /// This action invalidates case_end(). Old case_end() iterator will + /// point to the added case. void addCase(ConstantInt *OnVal, BasicBlock *Dest); - /// removeCase - This method removes the specified successor from the switch - /// instruction. Note that this cannot be used to remove the default - /// destination (successor #0). Also note that this operation may reorder the + /// removeCase - This method removes the specified case and its successor + /// from the switch instruction. Note that this operation may reorder the /// remaining cases at index idx and above. - /// - void removeCase(unsigned idx); + /// Note: + /// This action invalidates iterators for all cases following the one removed, + /// including the case_end() iterator. + void removeCase(CaseIt i); unsigned getNumSuccessors() const { return getNumOperands()/2; } BasicBlock *getSuccessor(unsigned idx) const { @@ -2518,20 +2704,6 @@ class SwitchInst : public TerminatorInst { setOperand(idx*2+1, (Value*)NewSucc); } - // getSuccessorValue - Return the value associated with the specified - // successor. - ConstantInt *getSuccessorValue(unsigned idx) const { - assert(idx < getNumSuccessors() && "Successor # out of range!"); - return reinterpret_cast(getOperand(idx*2)); - } - - // setSuccessorValue - Updates the value associated with the specified - // successor. - void setSuccessorValue(unsigned idx, ConstantInt* SuccessorValue) { - assert(idx < getNumSuccessors() && "Successor # out of range!"); - setOperand(idx*2, reinterpret_cast(SuccessorValue)); - } - // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const SwitchInst *) { return true; } static inline bool classof(const Instruction *I) { @@ -2889,42 +3061,6 @@ InvokeInst::InvokeInst(Value *Func, DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InvokeInst, Value) -//===----------------------------------------------------------------------===// -// UnwindInst Class -//===----------------------------------------------------------------------===// - -//===--------------------------------------------------------------------------- -/// UnwindInst - Immediately exit the current function, unwinding the stack -/// until an invoke instruction is found. -/// -class UnwindInst : public TerminatorInst { - void *operator new(size_t, unsigned); // DO NOT IMPLEMENT -protected: - virtual UnwindInst *clone_impl() const; -public: - // allocate space for exactly zero operands - void *operator new(size_t s) { - return User::operator new(s, 0); - } - explicit UnwindInst(LLVMContext &C, Instruction *InsertBefore = 0); - explicit UnwindInst(LLVMContext &C, BasicBlock *InsertAtEnd); - - unsigned getNumSuccessors() const { return 0; } - - // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const UnwindInst *) { return true; } - static inline bool classof(const Instruction *I) { - return I->getOpcode() == Instruction::Unwind; - } - static inline bool classof(const Value *V) { - return isa(V) && classof(cast(V)); - } -private: - virtual BasicBlock *getSuccessorV(unsigned idx) const; - virtual unsigned getNumSuccessorsV() const; - virtual void setSuccessorV(unsigned idx, BasicBlock *B); -}; - //===----------------------------------------------------------------------===// // ResumeInst Class //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/include/llvm/IntrinsicInst.h b/contrib/llvm/include/llvm/IntrinsicInst.h index 42862011ac7a..1cebdd2ee642 100644 --- a/contrib/llvm/include/llvm/IntrinsicInst.h +++ b/contrib/llvm/include/llvm/IntrinsicInst.h @@ -277,34 +277,6 @@ namespace llvm { } }; - /// EHExceptionInst - This represents the llvm.eh.exception instruction. - /// - class EHExceptionInst : public IntrinsicInst { - public: - // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const EHExceptionInst *) { return true; } - static inline bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::eh_exception; - } - static inline bool classof(const Value *V) { - return isa(V) && classof(cast(V)); - } - }; - - /// EHSelectorInst - This represents the llvm.eh.selector instruction. - /// - class EHSelectorInst : public IntrinsicInst { - public: - // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const EHSelectorInst *) { return true; } - static inline bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::eh_selector; - } - static inline bool classof(const Value *V) { - return isa(V) && classof(cast(V)); - } - }; - } #endif diff --git a/contrib/llvm/include/llvm/Intrinsics.td b/contrib/llvm/include/llvm/Intrinsics.td index d70f9153fd8a..069f907d4ff2 100644 --- a/contrib/llvm/include/llvm/Intrinsics.td +++ b/contrib/llvm/include/llvm/Intrinsics.td @@ -284,8 +284,8 @@ def int_expect : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, let Properties = [IntrNoMem] in { def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; - def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; - def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; + def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; + def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; } //===------------------------ Debugger Intrinsics -------------------------===// @@ -304,10 +304,6 @@ let Properties = [IntrNoMem] in { //===------------------ Exception Handling Intrinsics----------------------===// // -def int_eh_exception : Intrinsic<[llvm_ptr_ty], [], [IntrReadMem]>; -def int_eh_selector : Intrinsic<[llvm_i32_ty], - [llvm_ptr_ty, llvm_ptr_ty, llvm_vararg_ty]>; -def int_eh_resume : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [Throws]>; // The result of eh.typeid.for depends on the enclosing function, but inside a // given function it is 'const' and may be CSE'd etc. @@ -326,7 +322,6 @@ let Properties = [IntrNoMem] in { def int_eh_sjlj_callsite : Intrinsic<[], [llvm_i32_ty]>; } def int_eh_sjlj_functioncontext : Intrinsic<[], [llvm_ptr_ty]>; -def int_eh_sjlj_dispatch_setup : Intrinsic<[], [llvm_i32_ty]>; def int_eh_sjlj_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>; def int_eh_sjlj_longjmp : Intrinsic<[], [llvm_ptr_ty]>; @@ -443,6 +438,6 @@ include "llvm/IntrinsicsPowerPC.td" include "llvm/IntrinsicsX86.td" include "llvm/IntrinsicsARM.td" include "llvm/IntrinsicsCellSPU.td" -include "llvm/IntrinsicsAlpha.td" include "llvm/IntrinsicsXCore.td" include "llvm/IntrinsicsPTX.td" +include "llvm/IntrinsicsHexagon.td" diff --git a/contrib/llvm/include/llvm/IntrinsicsAlpha.td b/contrib/llvm/include/llvm/IntrinsicsAlpha.td deleted file mode 100644 index 59865cf8a3e6..000000000000 --- a/contrib/llvm/include/llvm/IntrinsicsAlpha.td +++ /dev/null @@ -1,18 +0,0 @@ -//===- IntrinsicsAlpha.td - Defines Alpha intrinsics -------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines all of the Alpha-specific intrinsics. -// -//===----------------------------------------------------------------------===// - - -let TargetPrefix = "alpha" in { // All intrinsics start with "llvm.alpha.". - def int_alpha_umulh : GCCBuiltin<"__builtin_alpha_umulh">, - Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; -} diff --git a/contrib/llvm/include/llvm/IntrinsicsHexagon.td b/contrib/llvm/include/llvm/IntrinsicsHexagon.td new file mode 100644 index 000000000000..eb5dc8fb1e7f --- /dev/null +++ b/contrib/llvm/include/llvm/IntrinsicsHexagon.td @@ -0,0 +1,3671 @@ +//===- IntrinsicsHexagon.td - Defines Hexagon intrinsics ---*- tablegen -*-===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the Hexagon-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Definitions for all Hexagon intrinsics. +// +// All Hexagon intrinsics start with "llvm.hexagon.". +let TargetPrefix = "hexagon" in { + /// Hexagon_Intrinsic - Base class for all altivec intrinsics. + class Hexagon_Intrinsic ret_types, + list param_types, + list properties> + : GCCBuiltin, + Intrinsic; +} + +//===----------------------------------------------------------------------===// +// +// DEF_FUNCTION_TYPE_1(QI_ftype_MEM,BT_BOOL,BT_PTR) -> +// Hexagon_qi_mem_Intrinsic +// +class Hexagon_qi_mem_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_1(void_ftype_SI,BT_VOID,BT_INT) -> +// Hexagon_void_si_Intrinsic +// +class Hexagon_void_si_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_1(HI_ftype_SI,BT_I16,BT_INT) -> +// Hexagon_hi_si_Intrinsic +// +class Hexagon_hi_si_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_1(SI_ftype_SI,BT_INT,BT_INT) -> +// Hexagon_si_si_Intrinsic +// +class Hexagon_si_si_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_1(DI_ftype_SI,BT_LONGLONG,BT_INT) -> +// Hexagon_di_si_Intrinsic +// +class Hexagon_di_si_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_1(SI_ftype_DI,BT_INT,BT_LONGLONG) -> +// Hexagon_si_di_Intrinsic +// +class Hexagon_si_di_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_1(DI_ftype_DI,BT_LONGLONG,BT_LONGLONG) -> +// Hexagon_di_di_Intrinsic +// +class Hexagon_di_di_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_1(QI_ftype_QI,BT_BOOL,BT_BOOL) -> +// Hexagon_qi_qi_Intrinsic +// +class Hexagon_qi_qi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_1(QI_ftype_SI,BT_BOOL,BT_INT) -> +// Hexagon_qi_si_Intrinsic +// +class Hexagon_qi_si_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_1(DI_ftype_QI,BT_LONGLONG,BT_BOOL) -> +// Hexagon_di_qi_Intrinsic +// +class Hexagon_di_qi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_1(SI_ftype_QI,BT_INT,BT_BOOL) -> +// Hexagon_si_qi_Intrinsic +// +class Hexagon_si_qi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_2(QI_ftype_SISI,BT_BOOL,BT_INT,BT_INT) -> +// Hexagon_qi_sisi_Intrinsic +// +class Hexagon_qi_sisi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_2(void_ftype_SISI,BT_VOID,BT_INT,BT_INT) -> +// Hexagon_void_sisi_Intrinsic +// +class Hexagon_void_sisi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_2(SI_ftype_SISI,BT_INT,BT_INT,BT_INT) -> +// Hexagon_si_sisi_Intrinsic +// +class Hexagon_si_sisi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_2(USI_ftype_SISI,BT_UINT,BT_INT,BT_INT) -> +// Hexagon_usi_sisi_Intrinsic +// +class Hexagon_usi_sisi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_2(DI_ftype_SISI,BT_LONGLONG,BT_INT,BT_INT) -> +// Hexagon_di_sisi_Intrinsic +// +class Hexagon_di_sisi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_2(UDI_ftype_SISI,BT_ULONGLONG,BT_INT,BT_INT) -> +// Hexagon_udi_sisi_Intrinsic +// +class Hexagon_udi_sisi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_2(DI_ftype_SIDI,BT_LONGLONG,BT_INT,BT_LONGLONG) -> +// Hexagon_di_sidi_Intrinsic +// +class Hexagon_di_sidi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_2(DI_ftype_DISI,BT_LONGLONG,BT_LONGLONG,BT_INT) -> +// Hexagon_di_disi_Intrinsic +// +class Hexagon_di_disi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_2(SI_ftype_SIDI,BT_INT,BT_INT,BT_LONGLONG) -> +// Hexagon_si_sidi_Intrinsic +// +class Hexagon_si_sidi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_2(SI_ftype_DIDI,BT_INT,BT_LONGLONG,BT_LONGLONG) -> +// Hexagon_si_didi_Intrinsic +// +class Hexagon_si_didi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_2(DI_ftype_DIDI,BT_LONGLONG,BT_LONGLONG,BT_LONGLONG) -> +// Hexagon_di_didi_Intrinsic +// +class Hexagon_di_didi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_2(UDI_ftype_DIDI,BT_ULONGLONG,BT_LONGLONG,BT_LONGLONG) -> +// Hexagon_udi_didi_Intrinsic +// +class Hexagon_udi_didi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_2(SI_ftype_DISI,BT_INT,BT_LONGLONG,BT_INT) -> +// Hexagon_si_disi_Intrinsic +// +class Hexagon_si_disi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_2(QI_ftype_DIDI,BT_BOOL,BT_LONGLONG,BT_LONGLONG) -> +// Hexagon_qi_didi_Intrinsic +// +class Hexagon_qi_didi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_2(QI_ftype_QIQI,BT_BOOL,BT_BOOL,BT_BOOL) -> +// Hexagon_qi_qiqi_Intrinsic +// +class Hexagon_qi_qiqi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_2(QI_ftype_QIQIQI,BT_BOOL,BT_BOOL,BT_BOOL) -> +// Hexagon_qi_qiqiqi_Intrinsic +// +class Hexagon_qi_qiqiqi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_2(SI_ftype_QIQI,BT_INT,BT_BOOL,BT_BOOL) -> +// Hexagon_si_qiqi_Intrinsic +// +class Hexagon_si_qiqi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_2(SI_ftype_QISI,BT_INT,BT_BOOL,BT_INT) -> +// Hexagon_si_qisi_Intrinsic +// +class Hexagon_si_qisi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_3(void_ftype_SISISI,BT_VOID,BT_INT,BT_INT,BT_INT) -> +// Hexagon_void_sisisi_Intrinsic +// +class Hexagon_void_sisisi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_3(SI_ftype_SISISI,BT_INT,BT_INT,BT_INT,BT_INT) -> +// Hexagon_si_sisisi_Intrinsic +// +class Hexagon_si_sisisi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_3(DI_ftype_SISISI,BT_LONGLONG,BT_INT,BT_INT,BT_INT) -> +// Hexagon_di_sisisi_Intrinsic +// +class Hexagon_di_sisisi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_3(SI_ftype_DISISI,BT_INT,BT_LONGLONG,BT_INT,BT_INT) -> +// Hexagon_si_disisi_Intrinsic +// +class Hexagon_si_disisi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_3(DI_ftype_DISISI,BT_LONGLONG,BT_LONGLONG,BT_INT,BT_INT) -> +// Hexagon_di_disisi_Intrinsic +// +class Hexagon_di_disisi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_3(SI_ftype_SIDISI,BT_INT,BT_INT,BT_LONGLONG,BT_INT) -> +// Hexagon_si_sidisi_Intrinsic +// +class Hexagon_si_sidisi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_3(DI_ftype_DIDISI,BT_LONGLONG,BT_LONGLONG, +// BT_LONGLONG,BT_INT) -> +// Hexagon_di_didisi_Intrinsic +// +class Hexagon_di_didisi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_3(SI_ftype_SIDIDI,BT_INT,BT_INT,BT_LONGLONG,BT_LONGLONG) -> +// Hexagon_si_sididi_Intrinsic +// +class Hexagon_si_sididi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_3(DI_ftype_DIDIDI,BT_LONGLONG,BT_LONGLONG,BT_LONGLONG, +// BT_LONGLONG) -> +// Hexagon_di_dididi_Intrinsic +// +class Hexagon_di_dididi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_3(SI_ftype_SISIDI,BT_INT,BT_INT,BT_INT,BT_LONGLONG) -> +// Hexagon_si_sisidi_Intrinsic +// +class Hexagon_si_sisidi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_3(SI_ftype_QISISI,BT_INT,BT_BOOL,BT_INT,BT_INT) -> +// Hexagon_si_qisisi_Intrinsic +// +class Hexagon_si_qisisi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_3(DI_ftype_QISISI,BT_LONGLONG,BT_BOOL,BT_INT,BT_INT) -> +// Hexagon_di_qisisi_Intrinsic +// +class Hexagon_di_qisisi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_3(DI_ftype_QIDIDI,BT_LONGLONG,BT_BOOL,BT_LONGLONG, +// BT_LONGLONG) -> +// Hexagon_di_qididi_Intrinsic +// +class Hexagon_di_qididi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_3(DI_ftype_DIDIQI,BT_LONGLONG,BT_LONGLONG,BT_LONGLONG, +// BT_BOOL) -> +// Hexagon_di_didiqi_Intrinsic +// +class Hexagon_di_didiqi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_4(SI_ftype_SISISISI,BT_INT,BT_INT,BT_INT,BT_INT,BT_INT) -> +// Hexagon_si_sisisisi_Intrinsic +// +class Hexagon_si_sisisisi_Intrinsic + : Hexagon_Intrinsic; +// +// DEF_FUNCTION_TYPE_4(DI_ftype_DIDISISI,BT_LONGLONG,BT_LONGLONG, +// BT_LONGLONG,BT_INT,BT_INT) -> +// Hexagon_di_didisisi_Intrinsic +// +class Hexagon_di_didisisi_Intrinsic + : Hexagon_Intrinsic; + +// +// BUILTIN_INFO(HEXAGON.C2_cmpeq,QI_ftype_SISI,2) +// +def int_hexagon_C2_cmpeq : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpeq">; +// +// BUILTIN_INFO(HEXAGON.C2_cmpgt,QI_ftype_SISI,2) +// +def int_hexagon_C2_cmpgt : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgt">; +// +// BUILTIN_INFO(HEXAGON.C2_cmpgtu,QI_ftype_SISI,2) +// +def int_hexagon_C2_cmpgtu : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgtu">; +// +// BUILTIN_INFO(HEXAGON.C2_cmpeqp,QI_ftype_DIDI,2) +// +def int_hexagon_C2_cmpeqp : Hexagon_qi_didi_Intrinsic<"HEXAGON.C2.cmpeqp">; +// +// BUILTIN_INFO(HEXAGON.C2_cmpgtp,QI_ftype_DIDI,2) +// +def int_hexagon_C2_cmpgtp : Hexagon_qi_didi_Intrinsic<"HEXAGON.C2.cmpgtp">; +// +// BUILTIN_INFO(HEXAGON.C2_cmpgtup,QI_ftype_DIDI,2) +// +def int_hexagon_C2_cmpgtup : Hexagon_qi_didi_Intrinsic<"HEXAGON.C2.cmpgtup">; +// +// BUILTIN_INFO(HEXAGON.C2_bitsset,QI_ftype_SISI,2) +// +def int_hexagon_C2_bitsset : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.bitsset">; +// +// BUILTIN_INFO(HEXAGON.C2_bitsclr,QI_ftype_SISI,2) +// +def int_hexagon_C2_bitsclr : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.bitsclr">; +// +// BUILTIN_INFO(HEXAGON.C2_cmpeqi,QI_ftype_SISI,2) +// +def int_hexagon_C2_cmpeqi : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpeqi">; +// +// BUILTIN_INFO(HEXAGON.C2_cmpgti,QI_ftype_SISI,2) +// +def int_hexagon_C2_cmpgti : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgti">; +// +// BUILTIN_INFO(HEXAGON.C2_cmpgtui,QI_ftype_SISI,2) +// +def int_hexagon_C2_cmpgtui : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgtui">; +// +// BUILTIN_INFO(HEXAGON.C2_cmpgei,QI_ftype_SISI,2) +// +def int_hexagon_C2_cmpgei : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgei">; +// +// BUILTIN_INFO(HEXAGON.C2_cmpgeui,QI_ftype_SISI,2) +// +def int_hexagon_C2_cmpgeui : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgeui">; +// +// BUILTIN_INFO(HEXAGON.C2_cmplt,QI_ftype_SISI,2) +// +def int_hexagon_C2_cmplt : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmplt">; +// +// BUILTIN_INFO(HEXAGON.C2_cmpltu,QI_ftype_SISI,2) +// +def int_hexagon_C2_cmpltu : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpltu">; +// +// BUILTIN_INFO(HEXAGON.C2_bitsclri,QI_ftype_SISI,2) +// +def int_hexagon_C2_bitsclri : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.bitsclri">; +// +// BUILTIN_INFO(HEXAGON.C2_and,QI_ftype_QIQI,2) +// +def int_hexagon_C2_and : Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.and">; +// +// BUILTIN_INFO(HEXAGON.C2_or,QI_ftype_QIQI,2) +// +def int_hexagon_C2_or : Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.or">; +// +// BUILTIN_INFO(HEXAGON.C2_xor,QI_ftype_QIQI,2) +// +def int_hexagon_C2_xor : Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.xor">; +// +// BUILTIN_INFO(HEXAGON.C2_andn,QI_ftype_QIQI,2) +// +def int_hexagon_C2_andn : Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.andn">; +// +// BUILTIN_INFO(HEXAGON.C2_not,QI_ftype_QI,1) +// +def int_hexagon_C2_not : Hexagon_qi_qi_Intrinsic<"HEXAGON.C2.not">; +// +// BUILTIN_INFO(HEXAGON.C2_orn,QI_ftype_QIQI,2) +// +def int_hexagon_C2_orn : Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.orn">; +// +// BUILTIN_INFO(HEXAGON.C2_pxfer_map,QI_ftype_QI,1) +// +def int_hexagon_C2_pxfer_map : Hexagon_qi_qi_Intrinsic<"HEXAGON.C2.pxfer.map">; +// +// BUILTIN_INFO(HEXAGON.C2_any8,QI_ftype_QI,1) +// +def int_hexagon_C2_any8 : Hexagon_qi_qi_Intrinsic<"HEXAGON.C2.any8">; +// +// BUILTIN_INFO(HEXAGON.C2_all8,QI_ftype_QI,1) +// +def int_hexagon_C2_all8 : Hexagon_qi_qi_Intrinsic<"HEXAGON.C2.all8">; +// +// BUILTIN_INFO(HEXAGON.C2_vitpack,SI_ftype_QIQI,2) +// +def int_hexagon_C2_vitpack : Hexagon_si_qiqi_Intrinsic<"HEXAGON.C2.vitpack">; +// +// BUILTIN_INFO(HEXAGON.C2_mux,SI_ftype_QISISI,3) +// +def int_hexagon_C2_mux : Hexagon_si_qisisi_Intrinsic<"HEXAGON.C2.mux">; +// +// BUILTIN_INFO(HEXAGON.C2_muxii,SI_ftype_QISISI,3) +// +def int_hexagon_C2_muxii : Hexagon_si_qisisi_Intrinsic<"HEXAGON.C2.muxii">; +// +// BUILTIN_INFO(HEXAGON.C2_muxir,SI_ftype_QISISI,3) +// +def int_hexagon_C2_muxir : Hexagon_si_qisisi_Intrinsic<"HEXAGON.C2.muxir">; +// +// BUILTIN_INFO(HEXAGON.C2_muxri,SI_ftype_QISISI,3) +// +def int_hexagon_C2_muxri : Hexagon_si_qisisi_Intrinsic<"HEXAGON.C2.muxri">; +// +// BUILTIN_INFO(HEXAGON.C2_vmux,DI_ftype_QIDIDI,3) +// +def int_hexagon_C2_vmux : Hexagon_di_qididi_Intrinsic<"HEXAGON.C2.vmux">; +// +// BUILTIN_INFO(HEXAGON.C2_mask,DI_ftype_QI,1) +// +def int_hexagon_C2_mask : Hexagon_di_qi_Intrinsic<"HEXAGON.C2.mask">; +// +// BUILTIN_INFO(HEXAGON.A2_vcmpbeq,QI_ftype_DIDI,2) +// +def int_hexagon_A2_vcmpbeq : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpbeq">; +// +// BUILTIN_INFO(HEXAGON.A2_vcmpbgtu,QI_ftype_DIDI,2) +// +def int_hexagon_A2_vcmpbgtu : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpbgtu">; +// +// BUILTIN_INFO(HEXAGON.A2_vcmpheq,QI_ftype_DIDI,2) +// +def int_hexagon_A2_vcmpheq : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpheq">; +// +// BUILTIN_INFO(HEXAGON.A2_vcmphgt,QI_ftype_DIDI,2) +// +def int_hexagon_A2_vcmphgt : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmphgt">; +// +// BUILTIN_INFO(HEXAGON.A2_vcmphgtu,QI_ftype_DIDI,2) +// +def int_hexagon_A2_vcmphgtu : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmphgtu">; +// +// BUILTIN_INFO(HEXAGON.A2_vcmpweq,QI_ftype_DIDI,2) +// +def int_hexagon_A2_vcmpweq : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpweq">; +// +// BUILTIN_INFO(HEXAGON.A2_vcmpwgt,QI_ftype_DIDI,2) +// +def int_hexagon_A2_vcmpwgt : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpwgt">; +// +// BUILTIN_INFO(HEXAGON.A2_vcmpwgtu,QI_ftype_DIDI,2) +// +def int_hexagon_A2_vcmpwgtu : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpwgtu">; +// +// BUILTIN_INFO(HEXAGON.C2_tfrpr,SI_ftype_QI,1) +// +def int_hexagon_C2_tfrpr : Hexagon_si_qi_Intrinsic<"HEXAGON.C2.tfrpr">; +// +// BUILTIN_INFO(HEXAGON.C2_tfrrp,QI_ftype_SI,1) +// +def int_hexagon_C2_tfrrp : Hexagon_qi_si_Intrinsic<"HEXAGON.C2.tfrrp">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hh_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_acc_hh_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.hh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hh_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_acc_hh_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.hh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hl_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_acc_hl_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.hl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hl_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_acc_hl_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.hl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_acc_lh_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_acc_lh_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.lh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_acc_lh_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_acc_lh_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.lh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_acc_ll_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_acc_ll_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.ll.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_acc_ll_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_acc_ll_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.ll.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hh_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_nac_hh_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.hh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hh_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_nac_hh_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.hh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hl_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_nac_hl_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.hl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hl_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_nac_hl_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.hl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_nac_lh_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_nac_lh_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.lh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_nac_lh_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_nac_lh_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.lh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_nac_ll_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_nac_ll_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.ll.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_nac_ll_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_nac_ll_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.ll.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hh_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_acc_sat_hh_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.hh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hh_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_acc_sat_hh_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.hh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hl_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_acc_sat_hl_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.hl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hl_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_acc_sat_hl_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.hl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_lh_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_acc_sat_lh_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.lh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_lh_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_acc_sat_lh_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.lh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_ll_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_acc_sat_ll_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.ll.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_ll_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_acc_sat_ll_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.ll.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hh_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_nac_sat_hh_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.hh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hh_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_nac_sat_hh_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.hh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hl_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_nac_sat_hl_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.hl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hl_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_nac_sat_hl_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.hl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_lh_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_nac_sat_lh_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.lh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_lh_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_nac_sat_lh_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.lh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_ll_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_nac_sat_ll_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.ll.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_ll_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpy_nac_sat_ll_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.ll.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_hh_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_hh_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.hh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_hh_s1,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_hh_s1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.hh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_hl_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_hl_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.hl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_hl_s1,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_hl_s1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.hl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_lh_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_lh_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.lh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_lh_s1,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_lh_s1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.lh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_ll_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_ll_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.ll.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_ll_s1,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_ll_s1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.ll.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hh_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_sat_hh_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.hh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hh_s1,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_sat_hh_s1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.hh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hl_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_sat_hl_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.hl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hl_s1,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_sat_hl_s1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.hl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_sat_lh_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_sat_lh_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.lh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_sat_lh_s1,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_sat_lh_s1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.lh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_sat_ll_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_sat_ll_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.ll.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_sat_ll_s1,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_sat_ll_s1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.ll.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hh_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_rnd_hh_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.hh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hh_s1,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_rnd_hh_s1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.hh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hl_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_rnd_hl_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.hl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hl_s1,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_rnd_hl_s1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.hl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_lh_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_rnd_lh_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.lh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_lh_s1,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_rnd_lh_s1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.lh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_ll_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_rnd_ll_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.ll.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_ll_s1,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_rnd_ll_s1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.ll.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hh_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_sat_rnd_hh_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.hh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hh_s1,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_sat_rnd_hh_s1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.hh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hl_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_sat_rnd_hl_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.hl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hl_s1,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_sat_rnd_hl_s1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.hl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_lh_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_sat_rnd_lh_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.lh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_lh_s1,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_sat_rnd_lh_s1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.lh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_ll_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_sat_rnd_ll_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.ll.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_ll_s1,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_sat_rnd_ll_s1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.ll.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hh_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyd_acc_hh_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.hh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hh_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyd_acc_hh_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.hh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hl_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyd_acc_hl_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.hl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hl_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyd_acc_hl_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.hl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_lh_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyd_acc_lh_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.lh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_lh_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyd_acc_lh_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.lh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_ll_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyd_acc_ll_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.ll.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_ll_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyd_acc_ll_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.ll.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hh_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyd_nac_hh_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.hh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hh_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyd_nac_hh_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.hh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hl_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyd_nac_hl_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.hl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hl_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyd_nac_hl_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.hl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_lh_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyd_nac_lh_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.lh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_lh_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyd_nac_lh_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.lh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_ll_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyd_nac_ll_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.ll.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_ll_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyd_nac_ll_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.ll.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_hh_s0,DI_ftype_SISI,2) +// +def int_hexagon_M2_mpyd_hh_s0 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.hh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_hh_s1,DI_ftype_SISI,2) +// +def int_hexagon_M2_mpyd_hh_s1 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.hh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_hl_s0,DI_ftype_SISI,2) +// +def int_hexagon_M2_mpyd_hl_s0 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.hl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_hl_s1,DI_ftype_SISI,2) +// +def int_hexagon_M2_mpyd_hl_s1 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.hl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_lh_s0,DI_ftype_SISI,2) +// +def int_hexagon_M2_mpyd_lh_s0 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.lh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_lh_s1,DI_ftype_SISI,2) +// +def int_hexagon_M2_mpyd_lh_s1 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.lh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_ll_s0,DI_ftype_SISI,2) +// +def int_hexagon_M2_mpyd_ll_s0 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.ll.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_ll_s1,DI_ftype_SISI,2) +// +def int_hexagon_M2_mpyd_ll_s1 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.ll.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hh_s0,DI_ftype_SISI,2) +// +def int_hexagon_M2_mpyd_rnd_hh_s0 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.hh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hh_s1,DI_ftype_SISI,2) +// +def int_hexagon_M2_mpyd_rnd_hh_s1 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.hh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hl_s0,DI_ftype_SISI,2) +// +def int_hexagon_M2_mpyd_rnd_hl_s0 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.hl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hl_s1,DI_ftype_SISI,2) +// +def int_hexagon_M2_mpyd_rnd_hl_s1 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.hl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_lh_s0,DI_ftype_SISI,2) +// +def int_hexagon_M2_mpyd_rnd_lh_s0 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.lh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_lh_s1,DI_ftype_SISI,2) +// +def int_hexagon_M2_mpyd_rnd_lh_s1 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.lh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_ll_s0,DI_ftype_SISI,2) +// +def int_hexagon_M2_mpyd_rnd_ll_s0 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.ll.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_ll_s1,DI_ftype_SISI,2) +// +def int_hexagon_M2_mpyd_rnd_ll_s1 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.ll.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hh_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpyu_acc_hh_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.hh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hh_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpyu_acc_hh_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.hh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hl_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpyu_acc_hl_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.hl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hl_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpyu_acc_hl_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.hl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_lh_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpyu_acc_lh_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.lh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_lh_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpyu_acc_lh_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.lh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_ll_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpyu_acc_ll_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.ll.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_ll_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpyu_acc_ll_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.ll.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hh_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpyu_nac_hh_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.hh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hh_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpyu_nac_hh_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.hh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hl_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpyu_nac_hl_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.hl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hl_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpyu_nac_hl_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.hl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_lh_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpyu_nac_lh_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.lh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_lh_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpyu_nac_lh_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.lh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_ll_s0,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpyu_nac_ll_s0 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.ll.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_ll_s1,SI_ftype_SISISI,3) +// +def int_hexagon_M2_mpyu_nac_ll_s1 : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.ll.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_hh_s0,USI_ftype_SISI,2) +// +def int_hexagon_M2_mpyu_hh_s0 : +Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.hh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_hh_s1,USI_ftype_SISI,2) +// +def int_hexagon_M2_mpyu_hh_s1 : +Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.hh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_hl_s0,USI_ftype_SISI,2) +// +def int_hexagon_M2_mpyu_hl_s0 : +Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.hl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_hl_s1,USI_ftype_SISI,2) +// +def int_hexagon_M2_mpyu_hl_s1 : +Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.hl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_lh_s0,USI_ftype_SISI,2) +// +def int_hexagon_M2_mpyu_lh_s0 : +Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.lh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_lh_s1,USI_ftype_SISI,2) +// +def int_hexagon_M2_mpyu_lh_s1 : +Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.lh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_ll_s0,USI_ftype_SISI,2) +// +def int_hexagon_M2_mpyu_ll_s0 : +Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.ll.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_ll_s1,USI_ftype_SISI,2) +// +def int_hexagon_M2_mpyu_ll_s1 : +Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.ll.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hh_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyud_acc_hh_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.hh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hh_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyud_acc_hh_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.hh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hl_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyud_acc_hl_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.hl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hl_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyud_acc_hl_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.hl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_lh_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyud_acc_lh_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.lh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_lh_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyud_acc_lh_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.lh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_ll_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyud_acc_ll_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.ll.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_ll_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyud_acc_ll_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.ll.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hh_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyud_nac_hh_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.hh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hh_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyud_nac_hh_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.hh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hl_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyud_nac_hl_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.hl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hl_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyud_nac_hl_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.hl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_lh_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyud_nac_lh_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.lh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_lh_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyud_nac_lh_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.lh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_ll_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyud_nac_ll_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.ll.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_ll_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_mpyud_nac_ll_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.ll.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_hh_s0,UDI_ftype_SISI,2) +// +def int_hexagon_M2_mpyud_hh_s0 : +Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.hh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_hh_s1,UDI_ftype_SISI,2) +// +def int_hexagon_M2_mpyud_hh_s1 : +Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.hh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_hl_s0,UDI_ftype_SISI,2) +// +def int_hexagon_M2_mpyud_hl_s0 : +Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.hl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_hl_s1,UDI_ftype_SISI,2) +// +def int_hexagon_M2_mpyud_hl_s1 : +Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.hl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_lh_s0,UDI_ftype_SISI,2) +// +def int_hexagon_M2_mpyud_lh_s0 : +Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.lh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_lh_s1,UDI_ftype_SISI,2) +// +def int_hexagon_M2_mpyud_lh_s1 : +Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.lh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_ll_s0,UDI_ftype_SISI,2) +// +def int_hexagon_M2_mpyud_ll_s0 : +Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.ll.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyud_ll_s1,UDI_ftype_SISI,2) +// +def int_hexagon_M2_mpyud_ll_s1 : +Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.ll.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mpysmi,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpysmi : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpysmi">; +// +// BUILTIN_INFO(HEXAGON.M2_macsip,SI_ftype_SISISI,3) +// +def int_hexagon_M2_macsip : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.macsip">; +// +// BUILTIN_INFO(HEXAGON.M2_macsin,SI_ftype_SISISI,3) +// +def int_hexagon_M2_macsin : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.macsin">; +// +// BUILTIN_INFO(HEXAGON.M2_dpmpyss_s0,DI_ftype_SISI,2) +// +def int_hexagon_M2_dpmpyss_s0 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.dpmpyss.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_dpmpyss_acc_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_dpmpyss_acc_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.dpmpyss.acc.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_dpmpyss_nac_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_dpmpyss_nac_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.dpmpyss.nac.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_dpmpyuu_s0,UDI_ftype_SISI,2) +// +def int_hexagon_M2_dpmpyuu_s0 : +Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.dpmpyuu.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_dpmpyuu_acc_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_dpmpyuu_acc_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.dpmpyuu.acc.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_dpmpyuu_nac_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_dpmpyuu_nac_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.dpmpyuu.nac.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpy_up,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpy_up : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.up">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyu_up,USI_ftype_SISI,2) +// +def int_hexagon_M2_mpyu_up : +Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.up">; +// +// BUILTIN_INFO(HEXAGON.M2_dpmpyss_rnd_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_dpmpyss_rnd_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.dpmpyss.rnd.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyi,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpyi : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpyi">; +// +// BUILTIN_INFO(HEXAGON.M2_mpyui,SI_ftype_SISI,2) +// +def int_hexagon_M2_mpyui : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpyui">; +// +// BUILTIN_INFO(HEXAGON.M2_maci,SI_ftype_SISISI,3) +// +def int_hexagon_M2_maci : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.maci">; +// +// BUILTIN_INFO(HEXAGON.M2_acci,SI_ftype_SISISI,3) +// +def int_hexagon_M2_acci : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.acci">; +// +// BUILTIN_INFO(HEXAGON.M2_accii,SI_ftype_SISISI,3) +// +def int_hexagon_M2_accii : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.accii">; +// +// BUILTIN_INFO(HEXAGON.M2_nacci,SI_ftype_SISISI,3) +// +def int_hexagon_M2_nacci : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.nacci">; +// +// BUILTIN_INFO(HEXAGON.M2_naccii,SI_ftype_SISISI,3) +// +def int_hexagon_M2_naccii : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.naccii">; +// +// BUILTIN_INFO(HEXAGON.M2_subacc,SI_ftype_SISISI,3) +// +def int_hexagon_M2_subacc : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.subacc">; +// +// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s0,DI_ftype_SISI,2) +// +def int_hexagon_M2_vmpy2s_s0 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.vmpy2s.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s1,DI_ftype_SISI,2) +// +def int_hexagon_M2_vmpy2s_s1 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.vmpy2s.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_vmac2s_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_vmac2s_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.vmac2s.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_vmac2s_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_vmac2s_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.vmac2s.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s0pack,SI_ftype_SISI,2) +// +def int_hexagon_M2_vmpy2s_s0pack : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.vmpy2s.s0pack">; +// +// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s1pack,SI_ftype_SISI,2) +// +def int_hexagon_M2_vmpy2s_s1pack : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.vmpy2s.s1pack">; +// +// BUILTIN_INFO(HEXAGON.M2_vmac2,DI_ftype_DISISI,3) +// +def int_hexagon_M2_vmac2 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.vmac2">; +// +// BUILTIN_INFO(HEXAGON.M2_vmpy2es_s0,DI_ftype_DIDI,2) +// +def int_hexagon_M2_vmpy2es_s0 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vmpy2es.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_vmpy2es_s1,DI_ftype_DIDI,2) +// +def int_hexagon_M2_vmpy2es_s1 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vmpy2es.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_vmac2es_s0,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_vmac2es_s0 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vmac2es.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_vmac2es_s1,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_vmac2es_s1 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vmac2es.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_vmac2es,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_vmac2es : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vmac2es">; +// +// BUILTIN_INFO(HEXAGON.M2_vrmac_s0,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_vrmac_s0 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrmac.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_vrmpy_s0,DI_ftype_DIDI,2) +// +def int_hexagon_M2_vrmpy_s0 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrmpy.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_vdmpyrs_s0,SI_ftype_DIDI,2) +// +def int_hexagon_M2_vdmpyrs_s0 : +Hexagon_si_didi_Intrinsic<"HEXAGON.M2.vdmpyrs.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_vdmpyrs_s1,SI_ftype_DIDI,2) +// +def int_hexagon_M2_vdmpyrs_s1 : +Hexagon_si_didi_Intrinsic<"HEXAGON.M2.vdmpyrs.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_vdmacs_s0,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_vdmacs_s0 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vdmacs.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_vdmacs_s1,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_vdmacs_s1 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vdmacs.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_vdmpys_s0,DI_ftype_DIDI,2) +// +def int_hexagon_M2_vdmpys_s0 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vdmpys.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_vdmpys_s1,DI_ftype_DIDI,2) +// +def int_hexagon_M2_vdmpys_s1 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vdmpys.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_cmpyrs_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_cmpyrs_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.cmpyrs.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_cmpyrs_s1,SI_ftype_SISI,2) +// +def int_hexagon_M2_cmpyrs_s1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.cmpyrs.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_cmpyrsc_s0,SI_ftype_SISI,2) +// +def int_hexagon_M2_cmpyrsc_s0 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.cmpyrsc.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_cmpyrsc_s1,SI_ftype_SISI,2) +// +def int_hexagon_M2_cmpyrsc_s1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.cmpyrsc.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_cmacs_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_cmacs_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacs.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_cmacs_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_cmacs_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacs.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_cmacsc_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_cmacsc_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacsc.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_cmacsc_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_cmacsc_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacsc.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_cmpys_s0,DI_ftype_SISI,2) +// +def int_hexagon_M2_cmpys_s0 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpys.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_cmpys_s1,DI_ftype_SISI,2) +// +def int_hexagon_M2_cmpys_s1 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpys.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_cmpysc_s0,DI_ftype_SISI,2) +// +def int_hexagon_M2_cmpysc_s0 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpysc.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_cmpysc_s1,DI_ftype_SISI,2) +// +def int_hexagon_M2_cmpysc_s1 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpysc.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_cnacs_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_cnacs_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cnacs.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_cnacs_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_cnacs_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cnacs.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_cnacsc_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_cnacsc_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cnacsc.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_cnacsc_s1,DI_ftype_DISISI,3) +// +def int_hexagon_M2_cnacsc_s1 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cnacsc.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_vrcmpys_s1,DI_ftype_DISI,2) +// +def int_hexagon_M2_vrcmpys_s1 : +Hexagon_di_disi_Intrinsic<"HEXAGON.M2.vrcmpys.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_vrcmpys_acc_s1,DI_ftype_DIDISI,3) +// +def int_hexagon_M2_vrcmpys_acc_s1 : +Hexagon_di_didisi_Intrinsic<"HEXAGON.M2.vrcmpys.acc.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_vrcmpys_s1rp,SI_ftype_DISI,2) +// +def int_hexagon_M2_vrcmpys_s1rp : +Hexagon_si_disi_Intrinsic<"HEXAGON.M2.vrcmpys.s1rp">; +// +// BUILTIN_INFO(HEXAGON.M2_mmacls_s0,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_mmacls_s0 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacls.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mmacls_s1,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_mmacls_s1 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacls.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mmachs_s0,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_mmachs_s0 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmachs.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mmachs_s1,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_mmachs_s1 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmachs.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mmpyl_s0,DI_ftype_DIDI,2) +// +def int_hexagon_M2_mmpyl_s0 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyl.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mmpyl_s1,DI_ftype_DIDI,2) +// +def int_hexagon_M2_mmpyl_s1 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyl.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mmpyh_s0,DI_ftype_DIDI,2) +// +def int_hexagon_M2_mmpyh_s0 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mmpyh_s1,DI_ftype_DIDI,2) +// +def int_hexagon_M2_mmpyh_s1 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mmacls_rs0,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_mmacls_rs0 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacls.rs0">; +// +// BUILTIN_INFO(HEXAGON.M2_mmacls_rs1,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_mmacls_rs1 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacls.rs1">; +// +// BUILTIN_INFO(HEXAGON.M2_mmachs_rs0,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_mmachs_rs0 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmachs.rs0">; +// +// BUILTIN_INFO(HEXAGON.M2_mmachs_rs1,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_mmachs_rs1 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmachs.rs1">; +// +// BUILTIN_INFO(HEXAGON.M2_mmpyl_rs0,DI_ftype_DIDI,2) +// +def int_hexagon_M2_mmpyl_rs0 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyl.rs0">; +// +// BUILTIN_INFO(HEXAGON.M2_mmpyl_rs1,DI_ftype_DIDI,2) +// +def int_hexagon_M2_mmpyl_rs1 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyl.rs1">; +// +// BUILTIN_INFO(HEXAGON.M2_mmpyh_rs0,DI_ftype_DIDI,2) +// +def int_hexagon_M2_mmpyh_rs0 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyh.rs0">; +// +// BUILTIN_INFO(HEXAGON.M2_mmpyh_rs1,DI_ftype_DIDI,2) +// +def int_hexagon_M2_mmpyh_rs1 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyh.rs1">; +// +// BUILTIN_INFO(HEXAGON.M2_hmmpyl_rs1,SI_ftype_SISI,2) +// +def int_hexagon_M2_hmmpyl_rs1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.hmmpyl.rs1">; +// +// BUILTIN_INFO(HEXAGON.M2_hmmpyh_rs1,SI_ftype_SISI,2) +// +def int_hexagon_M2_hmmpyh_rs1 : +Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.hmmpyh.rs1">; +// +// BUILTIN_INFO(HEXAGON.M2_mmaculs_s0,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_mmaculs_s0 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmaculs.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mmaculs_s1,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_mmaculs_s1 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmaculs.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mmacuhs_s0,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_mmacuhs_s0 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacuhs.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mmacuhs_s1,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_mmacuhs_s1 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacuhs.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mmpyul_s0,DI_ftype_DIDI,2) +// +def int_hexagon_M2_mmpyul_s0 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyul.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mmpyul_s1,DI_ftype_DIDI,2) +// +def int_hexagon_M2_mmpyul_s1 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyul.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mmpyuh_s0,DI_ftype_DIDI,2) +// +def int_hexagon_M2_mmpyuh_s0 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyuh.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_mmpyuh_s1,DI_ftype_DIDI,2) +// +def int_hexagon_M2_mmpyuh_s1 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyuh.s1">; +// +// BUILTIN_INFO(HEXAGON.M2_mmaculs_rs0,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_mmaculs_rs0 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmaculs.rs0">; +// +// BUILTIN_INFO(HEXAGON.M2_mmaculs_rs1,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_mmaculs_rs1 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmaculs.rs1">; +// +// BUILTIN_INFO(HEXAGON.M2_mmacuhs_rs0,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_mmacuhs_rs0 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacuhs.rs0">; +// +// BUILTIN_INFO(HEXAGON.M2_mmacuhs_rs1,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_mmacuhs_rs1 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacuhs.rs1">; +// +// BUILTIN_INFO(HEXAGON.M2_mmpyul_rs0,DI_ftype_DIDI,2) +// +def int_hexagon_M2_mmpyul_rs0 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyul.rs0">; +// +// BUILTIN_INFO(HEXAGON.M2_mmpyul_rs1,DI_ftype_DIDI,2) +// +def int_hexagon_M2_mmpyul_rs1 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyul.rs1">; +// +// BUILTIN_INFO(HEXAGON.M2_mmpyuh_rs0,DI_ftype_DIDI,2) +// +def int_hexagon_M2_mmpyuh_rs0 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyuh.rs0">; +// +// BUILTIN_INFO(HEXAGON.M2_mmpyuh_rs1,DI_ftype_DIDI,2) +// +def int_hexagon_M2_mmpyuh_rs1 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyuh.rs1">; +// +// BUILTIN_INFO(HEXAGON.M2_vrcmaci_s0,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_vrcmaci_s0 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrcmaci.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_vrcmacr_s0,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_vrcmacr_s0 : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrcmacr.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_vrcmaci_s0c,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_vrcmaci_s0c : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrcmaci.s0c">; +// +// BUILTIN_INFO(HEXAGON.M2_vrcmacr_s0c,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_vrcmacr_s0c : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrcmacr.s0c">; +// +// BUILTIN_INFO(HEXAGON.M2_cmaci_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_cmaci_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmaci.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_cmacr_s0,DI_ftype_DISISI,3) +// +def int_hexagon_M2_cmacr_s0 : +Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacr.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_vrcmpyi_s0,DI_ftype_DIDI,2) +// +def int_hexagon_M2_vrcmpyi_s0 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrcmpyi.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_vrcmpyr_s0,DI_ftype_DIDI,2) +// +def int_hexagon_M2_vrcmpyr_s0 : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrcmpyr.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_vrcmpyi_s0c,DI_ftype_DIDI,2) +// +def int_hexagon_M2_vrcmpyi_s0c : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrcmpyi.s0c">; +// +// BUILTIN_INFO(HEXAGON.M2_vrcmpyr_s0c,DI_ftype_DIDI,2) +// +def int_hexagon_M2_vrcmpyr_s0c : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrcmpyr.s0c">; +// +// BUILTIN_INFO(HEXAGON.M2_cmpyi_s0,DI_ftype_SISI,2) +// +def int_hexagon_M2_cmpyi_s0 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpyi.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_cmpyr_s0,DI_ftype_SISI,2) +// +def int_hexagon_M2_cmpyr_s0 : +Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpyr.s0">; +// +// BUILTIN_INFO(HEXAGON.M2_vcmpy_s0_sat_i,DI_ftype_DIDI,2) +// +def int_hexagon_M2_vcmpy_s0_sat_i : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vcmpy.s0.sat.i">; +// +// BUILTIN_INFO(HEXAGON.M2_vcmpy_s0_sat_r,DI_ftype_DIDI,2) +// +def int_hexagon_M2_vcmpy_s0_sat_r : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vcmpy.s0.sat.r">; +// +// BUILTIN_INFO(HEXAGON.M2_vcmpy_s1_sat_i,DI_ftype_DIDI,2) +// +def int_hexagon_M2_vcmpy_s1_sat_i : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vcmpy.s1.sat.i">; +// +// BUILTIN_INFO(HEXAGON.M2_vcmpy_s1_sat_r,DI_ftype_DIDI,2) +// +def int_hexagon_M2_vcmpy_s1_sat_r : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vcmpy.s1.sat.r">; +// +// BUILTIN_INFO(HEXAGON.M2_vcmac_s0_sat_i,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_vcmac_s0_sat_i : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vcmac.s0.sat.i">; +// +// BUILTIN_INFO(HEXAGON.M2_vcmac_s0_sat_r,DI_ftype_DIDIDI,3) +// +def int_hexagon_M2_vcmac_s0_sat_r : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vcmac.s0.sat.r">; +// +// BUILTIN_INFO(HEXAGON.S2_vcrotate,DI_ftype_DISI,2) +// +def int_hexagon_S2_vcrotate : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.vcrotate">; +// +// BUILTIN_INFO(HEXAGON.A2_add,SI_ftype_SISI,2) +// +def int_hexagon_A2_add : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.add">; +// +// BUILTIN_INFO(HEXAGON.A2_sub,SI_ftype_SISI,2) +// +def int_hexagon_A2_sub : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.sub">; +// +// BUILTIN_INFO(HEXAGON.A2_addsat,SI_ftype_SISI,2) +// +def int_hexagon_A2_addsat : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addsat">; +// +// BUILTIN_INFO(HEXAGON.A2_subsat,SI_ftype_SISI,2) +// +def int_hexagon_A2_subsat : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subsat">; +// +// BUILTIN_INFO(HEXAGON.A2_addi,SI_ftype_SISI,2) +// +def int_hexagon_A2_addi : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addi">; +// +// BUILTIN_INFO(HEXAGON.A2_addh_l16_ll,SI_ftype_SISI,2) +// +def int_hexagon_A2_addh_l16_ll : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.ll">; +// +// BUILTIN_INFO(HEXAGON.A2_addh_l16_hl,SI_ftype_SISI,2) +// +def int_hexagon_A2_addh_l16_hl : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.hl">; +def int_hexagon_A2_addh_l16_lh : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.lh">; +def int_hexagon_A2_addh_l16_hh : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.hh">; +// +// BUILTIN_INFO(HEXAGON.A2_addh_l16_sat_ll,SI_ftype_SISI,2) +// +def int_hexagon_A2_addh_l16_sat_ll : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.sat.ll">; +// +// BUILTIN_INFO(HEXAGON.A2_addh_l16_sat_hl,SI_ftype_SISI,2) +// +def int_hexagon_A2_addh_l16_sat_hl : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.sat.hl">; +def int_hexagon_A2_addh_l16_sat_lh : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.sat.lh">; +def int_hexagon_A2_addh_l16_sat_hh : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.sat.hh">; +// +// BUILTIN_INFO(HEXAGON.A2_subh_l16_ll,SI_ftype_SISI,2) +// +def int_hexagon_A2_subh_l16_ll : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.l16.ll">; +// +// BUILTIN_INFO(HEXAGON.A2_subh_l16_hl,SI_ftype_SISI,2) +// +def int_hexagon_A2_subh_l16_hl : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.l16.hl">; +// +// BUILTIN_INFO(HEXAGON.A2_subh_l16_sat_ll,SI_ftype_SISI,2) +// +def int_hexagon_A2_subh_l16_sat_ll : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.l16.sat.ll">; +// +// BUILTIN_INFO(HEXAGON.A2_subh_l16_sat_hl,SI_ftype_SISI,2) +// +def int_hexagon_A2_subh_l16_sat_hl : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.l16.sat.hl">; +// +// BUILTIN_INFO(HEXAGON.A2_addh_h16_ll,SI_ftype_SISI,2) +// +def int_hexagon_A2_addh_h16_ll : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.ll">; +// +// BUILTIN_INFO(HEXAGON.A2_addh_h16_lh,SI_ftype_SISI,2) +// +def int_hexagon_A2_addh_h16_lh : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.lh">; +// +// BUILTIN_INFO(HEXAGON.A2_addh_h16_hl,SI_ftype_SISI,2) +// +def int_hexagon_A2_addh_h16_hl : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.hl">; +// +// BUILTIN_INFO(HEXAGON.A2_addh_h16_hh,SI_ftype_SISI,2) +// +def int_hexagon_A2_addh_h16_hh : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.hh">; +// +// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_ll,SI_ftype_SISI,2) +// +def int_hexagon_A2_addh_h16_sat_ll : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.sat.ll">; +// +// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_lh,SI_ftype_SISI,2) +// +def int_hexagon_A2_addh_h16_sat_lh : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.sat.lh">; +// +// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_hl,SI_ftype_SISI,2) +// +def int_hexagon_A2_addh_h16_sat_hl : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.sat.hl">; +// +// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_hh,SI_ftype_SISI,2) +// +def int_hexagon_A2_addh_h16_sat_hh : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.sat.hh">; +// +// BUILTIN_INFO(HEXAGON.A2_subh_h16_ll,SI_ftype_SISI,2) +// +def int_hexagon_A2_subh_h16_ll : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.ll">; +// +// BUILTIN_INFO(HEXAGON.A2_subh_h16_lh,SI_ftype_SISI,2) +// +def int_hexagon_A2_subh_h16_lh : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.lh">; +// +// BUILTIN_INFO(HEXAGON.A2_subh_h16_hl,SI_ftype_SISI,2) +// +def int_hexagon_A2_subh_h16_hl : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.hl">; +// +// BUILTIN_INFO(HEXAGON.A2_subh_h16_hh,SI_ftype_SISI,2) +// +def int_hexagon_A2_subh_h16_hh : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.hh">; +// +// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_ll,SI_ftype_SISI,2) +// +def int_hexagon_A2_subh_h16_sat_ll : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.sat.ll">; +// +// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_lh,SI_ftype_SISI,2) +// +def int_hexagon_A2_subh_h16_sat_lh : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.sat.lh">; +// +// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_hl,SI_ftype_SISI,2) +// +def int_hexagon_A2_subh_h16_sat_hl : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.sat.hl">; +// +// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_hh,SI_ftype_SISI,2) +// +def int_hexagon_A2_subh_h16_sat_hh : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.sat.hh">; +// +// BUILTIN_INFO(HEXAGON.A2_aslh,SI_ftype_SI,1) +// +def int_hexagon_A2_aslh : +Hexagon_si_si_Intrinsic<"HEXAGON.A2.aslh">; +// +// BUILTIN_INFO(HEXAGON.A2_asrh,SI_ftype_SI,1) +// +def int_hexagon_A2_asrh : +Hexagon_si_si_Intrinsic<"HEXAGON.A2.asrh">; +// +// BUILTIN_INFO(HEXAGON.A2_addp,DI_ftype_DIDI,2) +// +def int_hexagon_A2_addp : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.addp">; +// +// BUILTIN_INFO(HEXAGON.A2_addpsat,DI_ftype_DIDI,2) +// +def int_hexagon_A2_addpsat : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.addpsat">; +// +// BUILTIN_INFO(HEXAGON.A2_addsp,DI_ftype_SIDI,2) +// +def int_hexagon_A2_addsp : +Hexagon_di_sidi_Intrinsic<"HEXAGON.A2.addsp">; +// +// BUILTIN_INFO(HEXAGON.A2_subp,DI_ftype_DIDI,2) +// +def int_hexagon_A2_subp : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.subp">; +// +// BUILTIN_INFO(HEXAGON.A2_neg,SI_ftype_SI,1) +// +def int_hexagon_A2_neg : +Hexagon_si_si_Intrinsic<"HEXAGON.A2.neg">; +// +// BUILTIN_INFO(HEXAGON.A2_negsat,SI_ftype_SI,1) +// +def int_hexagon_A2_negsat : +Hexagon_si_si_Intrinsic<"HEXAGON.A2.negsat">; +// +// BUILTIN_INFO(HEXAGON.A2_abs,SI_ftype_SI,1) +// +def int_hexagon_A2_abs : +Hexagon_si_si_Intrinsic<"HEXAGON.A2.abs">; +// +// BUILTIN_INFO(HEXAGON.A2_abssat,SI_ftype_SI,1) +// +def int_hexagon_A2_abssat : +Hexagon_si_si_Intrinsic<"HEXAGON.A2.abssat">; +// +// BUILTIN_INFO(HEXAGON.A2_vconj,DI_ftype_DI,1) +// +def int_hexagon_A2_vconj : +Hexagon_di_di_Intrinsic<"HEXAGON.A2.vconj">; +// +// BUILTIN_INFO(HEXAGON.A2_negp,DI_ftype_DI,1) +// +def int_hexagon_A2_negp : +Hexagon_di_di_Intrinsic<"HEXAGON.A2.negp">; +// +// BUILTIN_INFO(HEXAGON.A2_absp,DI_ftype_DI,1) +// +def int_hexagon_A2_absp : +Hexagon_di_di_Intrinsic<"HEXAGON.A2.absp">; +// +// BUILTIN_INFO(HEXAGON.A2_max,SI_ftype_SISI,2) +// +def int_hexagon_A2_max : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.max">; +// +// BUILTIN_INFO(HEXAGON.A2_maxu,USI_ftype_SISI,2) +// +def int_hexagon_A2_maxu : +Hexagon_usi_sisi_Intrinsic<"HEXAGON.A2.maxu">; +// +// BUILTIN_INFO(HEXAGON.A2_min,SI_ftype_SISI,2) +// +def int_hexagon_A2_min : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.min">; +// +// BUILTIN_INFO(HEXAGON.A2_minu,USI_ftype_SISI,2) +// +def int_hexagon_A2_minu : +Hexagon_usi_sisi_Intrinsic<"HEXAGON.A2.minu">; +// +// BUILTIN_INFO(HEXAGON.A2_maxp,DI_ftype_DIDI,2) +// +def int_hexagon_A2_maxp : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.maxp">; +// +// BUILTIN_INFO(HEXAGON.A2_maxup,UDI_ftype_DIDI,2) +// +def int_hexagon_A2_maxup : +Hexagon_udi_didi_Intrinsic<"HEXAGON.A2.maxup">; +// +// BUILTIN_INFO(HEXAGON.A2_minp,DI_ftype_DIDI,2) +// +def int_hexagon_A2_minp : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.minp">; +// +// BUILTIN_INFO(HEXAGON.A2_minup,UDI_ftype_DIDI,2) +// +def int_hexagon_A2_minup : +Hexagon_udi_didi_Intrinsic<"HEXAGON.A2.minup">; +// +// BUILTIN_INFO(HEXAGON.A2_tfr,SI_ftype_SI,1) +// +def int_hexagon_A2_tfr : +Hexagon_si_si_Intrinsic<"HEXAGON.A2.tfr">; +// +// BUILTIN_INFO(HEXAGON.A2_tfrsi,SI_ftype_SI,1) +// +def int_hexagon_A2_tfrsi : +Hexagon_si_si_Intrinsic<"HEXAGON.A2.tfrsi">; +// +// BUILTIN_INFO(HEXAGON.A2_tfrp,DI_ftype_DI,1) +// +def int_hexagon_A2_tfrp : +Hexagon_di_di_Intrinsic<"HEXAGON.A2.tfrp">; +// +// BUILTIN_INFO(HEXAGON.A2_tfrpi,DI_ftype_SI,1) +// +def int_hexagon_A2_tfrpi : +Hexagon_di_si_Intrinsic<"HEXAGON.A2.tfrpi">; +// +// BUILTIN_INFO(HEXAGON.A2_zxtb,SI_ftype_SI,1) +// +def int_hexagon_A2_zxtb : +Hexagon_si_si_Intrinsic<"HEXAGON.A2.zxtb">; +// +// BUILTIN_INFO(HEXAGON.A2_sxtb,SI_ftype_SI,1) +// +def int_hexagon_A2_sxtb : +Hexagon_si_si_Intrinsic<"HEXAGON.A2.sxtb">; +// +// BUILTIN_INFO(HEXAGON.A2_zxth,SI_ftype_SI,1) +// +def int_hexagon_A2_zxth : +Hexagon_si_si_Intrinsic<"HEXAGON.A2.zxth">; +// +// BUILTIN_INFO(HEXAGON.A2_sxth,SI_ftype_SI,1) +// +def int_hexagon_A2_sxth : +Hexagon_si_si_Intrinsic<"HEXAGON.A2.sxth">; +// +// BUILTIN_INFO(HEXAGON.A2_combinew,DI_ftype_SISI,2) +// +def int_hexagon_A2_combinew : +Hexagon_di_sisi_Intrinsic<"HEXAGON.A2.combinew">; +// +// BUILTIN_INFO(HEXAGON.A2_combineii,DI_ftype_SISI,2) +// +def int_hexagon_A2_combineii : +Hexagon_di_sisi_Intrinsic<"HEXAGON.A2.combineii">; +// +// BUILTIN_INFO(HEXAGON.A2_combine_hh,SI_ftype_SISI,2) +// +def int_hexagon_A2_combine_hh : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.combine.hh">; +// +// BUILTIN_INFO(HEXAGON.A2_combine_hl,SI_ftype_SISI,2) +// +def int_hexagon_A2_combine_hl : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.combine.hl">; +// +// BUILTIN_INFO(HEXAGON.A2_combine_lh,SI_ftype_SISI,2) +// +def int_hexagon_A2_combine_lh : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.combine.lh">; +// +// BUILTIN_INFO(HEXAGON.A2_combine_ll,SI_ftype_SISI,2) +// +def int_hexagon_A2_combine_ll : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.combine.ll">; +// +// BUILTIN_INFO(HEXAGON.A2_tfril,SI_ftype_SISI,2) +// +def int_hexagon_A2_tfril : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.tfril">; +// +// BUILTIN_INFO(HEXAGON.A2_tfrih,SI_ftype_SISI,2) +// +def int_hexagon_A2_tfrih : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.tfrih">; +// +// BUILTIN_INFO(HEXAGON.A2_and,SI_ftype_SISI,2) +// +def int_hexagon_A2_and : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.and">; +// +// BUILTIN_INFO(HEXAGON.A2_or,SI_ftype_SISI,2) +// +def int_hexagon_A2_or : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.or">; +// +// BUILTIN_INFO(HEXAGON.A2_xor,SI_ftype_SISI,2) +// +def int_hexagon_A2_xor : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.xor">; +// +// BUILTIN_INFO(HEXAGON.A2_not,SI_ftype_SI,1) +// +def int_hexagon_A2_not : +Hexagon_si_si_Intrinsic<"HEXAGON.A2.not">; +// +// BUILTIN_INFO(HEXAGON.M2_xor_xacc,SI_ftype_SISISI,3) +// +def int_hexagon_M2_xor_xacc : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.xor.xacc">; +// +// BUILTIN_INFO(HEXAGON.A2_subri,SI_ftype_SISI,2) +// +def int_hexagon_A2_subri : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subri">; +// +// BUILTIN_INFO(HEXAGON.A2_andir,SI_ftype_SISI,2) +// +def int_hexagon_A2_andir : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.andir">; +// +// BUILTIN_INFO(HEXAGON.A2_orir,SI_ftype_SISI,2) +// +def int_hexagon_A2_orir : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.orir">; +// +// BUILTIN_INFO(HEXAGON.A2_andp,DI_ftype_DIDI,2) +// +def int_hexagon_A2_andp : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.andp">; +// +// BUILTIN_INFO(HEXAGON.A2_orp,DI_ftype_DIDI,2) +// +def int_hexagon_A2_orp : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.orp">; +// +// BUILTIN_INFO(HEXAGON.A2_xorp,DI_ftype_DIDI,2) +// +def int_hexagon_A2_xorp : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.xorp">; +// +// BUILTIN_INFO(HEXAGON.A2_notp,DI_ftype_DI,1) +// +def int_hexagon_A2_notp : +Hexagon_di_di_Intrinsic<"HEXAGON.A2.notp">; +// +// BUILTIN_INFO(HEXAGON.A2_sxtw,DI_ftype_SI,1) +// +def int_hexagon_A2_sxtw : +Hexagon_di_si_Intrinsic<"HEXAGON.A2.sxtw">; +// +// BUILTIN_INFO(HEXAGON.A2_sat,SI_ftype_DI,1) +// +def int_hexagon_A2_sat : +Hexagon_si_di_Intrinsic<"HEXAGON.A2.sat">; +// +// BUILTIN_INFO(HEXAGON.A2_sath,SI_ftype_SI,1) +// +def int_hexagon_A2_sath : +Hexagon_si_si_Intrinsic<"HEXAGON.A2.sath">; +// +// BUILTIN_INFO(HEXAGON.A2_satuh,SI_ftype_SI,1) +// +def int_hexagon_A2_satuh : +Hexagon_si_si_Intrinsic<"HEXAGON.A2.satuh">; +// +// BUILTIN_INFO(HEXAGON.A2_satub,SI_ftype_SI,1) +// +def int_hexagon_A2_satub : +Hexagon_si_si_Intrinsic<"HEXAGON.A2.satub">; +// +// BUILTIN_INFO(HEXAGON.A2_satb,SI_ftype_SI,1) +// +def int_hexagon_A2_satb : +Hexagon_si_si_Intrinsic<"HEXAGON.A2.satb">; +// +// BUILTIN_INFO(HEXAGON.A2_vaddub,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vaddub : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddub">; +// +// BUILTIN_INFO(HEXAGON.A2_vaddubs,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vaddubs : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddubs">; +// +// BUILTIN_INFO(HEXAGON.A2_vaddh,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vaddh : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddh">; +// +// BUILTIN_INFO(HEXAGON.A2_vaddhs,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vaddhs : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddhs">; +// +// BUILTIN_INFO(HEXAGON.A2_vadduhs,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vadduhs : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vadduhs">; +// +// BUILTIN_INFO(HEXAGON.A2_vaddw,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vaddw : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddw">; +// +// BUILTIN_INFO(HEXAGON.A2_vaddws,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vaddws : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddws">; +// +// BUILTIN_INFO(HEXAGON.A2_svavgh,SI_ftype_SISI,2) +// +def int_hexagon_A2_svavgh : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svavgh">; +// +// BUILTIN_INFO(HEXAGON.A2_svavghs,SI_ftype_SISI,2) +// +def int_hexagon_A2_svavghs : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svavghs">; +// +// BUILTIN_INFO(HEXAGON.A2_svnavgh,SI_ftype_SISI,2) +// +def int_hexagon_A2_svnavgh : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svnavgh">; +// +// BUILTIN_INFO(HEXAGON.A2_svaddh,SI_ftype_SISI,2) +// +def int_hexagon_A2_svaddh : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svaddh">; +// +// BUILTIN_INFO(HEXAGON.A2_svaddhs,SI_ftype_SISI,2) +// +def int_hexagon_A2_svaddhs : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svaddhs">; +// +// BUILTIN_INFO(HEXAGON.A2_svadduhs,SI_ftype_SISI,2) +// +def int_hexagon_A2_svadduhs : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svadduhs">; +// +// BUILTIN_INFO(HEXAGON.A2_svsubh,SI_ftype_SISI,2) +// +def int_hexagon_A2_svsubh : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svsubh">; +// +// BUILTIN_INFO(HEXAGON.A2_svsubhs,SI_ftype_SISI,2) +// +def int_hexagon_A2_svsubhs : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svsubhs">; +// +// BUILTIN_INFO(HEXAGON.A2_svsubuhs,SI_ftype_SISI,2) +// +def int_hexagon_A2_svsubuhs : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svsubuhs">; +// +// BUILTIN_INFO(HEXAGON.A2_vraddub,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vraddub : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vraddub">; +// +// BUILTIN_INFO(HEXAGON.A2_vraddub_acc,DI_ftype_DIDIDI,3) +// +def int_hexagon_A2_vraddub_acc : +Hexagon_di_dididi_Intrinsic<"HEXAGON.A2.vraddub.acc">; +// +// BUILTIN_INFO(HEXAGON.M2_vradduh,SI_ftype_DIDI,2) +// +def int_hexagon_M2_vradduh : +Hexagon_si_didi_Intrinsic<"HEXAGON.M2.vradduh">; +// +// BUILTIN_INFO(HEXAGON.A2_vsubub,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vsubub : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubub">; +// +// BUILTIN_INFO(HEXAGON.A2_vsububs,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vsububs : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsububs">; +// +// BUILTIN_INFO(HEXAGON.A2_vsubh,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vsubh : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubh">; +// +// BUILTIN_INFO(HEXAGON.A2_vsubhs,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vsubhs : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubhs">; +// +// BUILTIN_INFO(HEXAGON.A2_vsubuhs,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vsubuhs : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubuhs">; +// +// BUILTIN_INFO(HEXAGON.A2_vsubw,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vsubw : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubw">; +// +// BUILTIN_INFO(HEXAGON.A2_vsubws,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vsubws : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubws">; +// +// BUILTIN_INFO(HEXAGON.A2_vabsh,DI_ftype_DI,1) +// +def int_hexagon_A2_vabsh : +Hexagon_di_di_Intrinsic<"HEXAGON.A2.vabsh">; +// +// BUILTIN_INFO(HEXAGON.A2_vabshsat,DI_ftype_DI,1) +// +def int_hexagon_A2_vabshsat : +Hexagon_di_di_Intrinsic<"HEXAGON.A2.vabshsat">; +// +// BUILTIN_INFO(HEXAGON.A2_vabsw,DI_ftype_DI,1) +// +def int_hexagon_A2_vabsw : +Hexagon_di_di_Intrinsic<"HEXAGON.A2.vabsw">; +// +// BUILTIN_INFO(HEXAGON.A2_vabswsat,DI_ftype_DI,1) +// +def int_hexagon_A2_vabswsat : +Hexagon_di_di_Intrinsic<"HEXAGON.A2.vabswsat">; +// +// BUILTIN_INFO(HEXAGON.M2_vabsdiffw,DI_ftype_DIDI,2) +// +def int_hexagon_M2_vabsdiffw : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vabsdiffw">; +// +// BUILTIN_INFO(HEXAGON.M2_vabsdiffh,DI_ftype_DIDI,2) +// +def int_hexagon_M2_vabsdiffh : +Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vabsdiffh">; +// +// BUILTIN_INFO(HEXAGON.A2_vrsadub,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vrsadub : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vrsadub">; +// +// BUILTIN_INFO(HEXAGON.A2_vrsadub_acc,DI_ftype_DIDIDI,3) +// +def int_hexagon_A2_vrsadub_acc : +Hexagon_di_dididi_Intrinsic<"HEXAGON.A2.vrsadub.acc">; +// +// BUILTIN_INFO(HEXAGON.A2_vavgub,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vavgub : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgub">; +// +// BUILTIN_INFO(HEXAGON.A2_vavguh,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vavguh : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavguh">; +// +// BUILTIN_INFO(HEXAGON.A2_vavgh,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vavgh : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgh">; +// +// BUILTIN_INFO(HEXAGON.A2_vnavgh,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vnavgh : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavgh">; +// +// BUILTIN_INFO(HEXAGON.A2_vavgw,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vavgw : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgw">; +// +// BUILTIN_INFO(HEXAGON.A2_vnavgw,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vnavgw : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavgw">; +// +// BUILTIN_INFO(HEXAGON.A2_vavgwr,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vavgwr : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgwr">; +// +// BUILTIN_INFO(HEXAGON.A2_vnavgwr,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vnavgwr : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavgwr">; +// +// BUILTIN_INFO(HEXAGON.A2_vavgwcr,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vavgwcr : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgwcr">; +// +// BUILTIN_INFO(HEXAGON.A2_vnavgwcr,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vnavgwcr : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavgwcr">; +// +// BUILTIN_INFO(HEXAGON.A2_vavghcr,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vavghcr : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavghcr">; +// +// BUILTIN_INFO(HEXAGON.A2_vnavghcr,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vnavghcr : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavghcr">; +// +// BUILTIN_INFO(HEXAGON.A2_vavguw,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vavguw : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavguw">; +// +// BUILTIN_INFO(HEXAGON.A2_vavguwr,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vavguwr : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavguwr">; +// +// BUILTIN_INFO(HEXAGON.A2_vavgubr,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vavgubr : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgubr">; +// +// BUILTIN_INFO(HEXAGON.A2_vavguhr,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vavguhr : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavguhr">; +// +// BUILTIN_INFO(HEXAGON.A2_vavghr,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vavghr : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavghr">; +// +// BUILTIN_INFO(HEXAGON.A2_vnavghr,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vnavghr : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavghr">; +// +// BUILTIN_INFO(HEXAGON.A2_vminh,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vminh : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminh">; +// +// BUILTIN_INFO(HEXAGON.A2_vmaxh,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vmaxh : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxh">; +// +// BUILTIN_INFO(HEXAGON.A2_vminub,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vminub : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminub">; +// +// BUILTIN_INFO(HEXAGON.A2_vmaxub,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vmaxub : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxub">; +// +// BUILTIN_INFO(HEXAGON.A2_vminuh,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vminuh : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminuh">; +// +// BUILTIN_INFO(HEXAGON.A2_vmaxuh,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vmaxuh : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxuh">; +// +// BUILTIN_INFO(HEXAGON.A2_vminw,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vminw : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminw">; +// +// BUILTIN_INFO(HEXAGON.A2_vmaxw,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vmaxw : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxw">; +// +// BUILTIN_INFO(HEXAGON.A2_vminuw,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vminuw : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminuw">; +// +// BUILTIN_INFO(HEXAGON.A2_vmaxuw,DI_ftype_DIDI,2) +// +def int_hexagon_A2_vmaxuw : +Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxuw">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_r_r,SI_ftype_SISI,2) +// +def int_hexagon_S2_asr_r_r : +Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.r.r">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_r_r,SI_ftype_SISI,2) +// +def int_hexagon_S2_asl_r_r : +Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asl.r.r">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_r_r,SI_ftype_SISI,2) +// +def int_hexagon_S2_lsr_r_r : +Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.lsr.r.r">; +// +// BUILTIN_INFO(HEXAGON.S2_lsl_r_r,SI_ftype_SISI,2) +// +def int_hexagon_S2_lsl_r_r : +Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.lsl.r.r">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_r_p,DI_ftype_DISI,2) +// +def int_hexagon_S2_asr_r_p : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.r.p">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_r_p,DI_ftype_DISI,2) +// +def int_hexagon_S2_asl_r_p : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.r.p">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_r_p,DI_ftype_DISI,2) +// +def int_hexagon_S2_lsr_r_p : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.r.p">; +// +// BUILTIN_INFO(HEXAGON.S2_lsl_r_p,DI_ftype_DISI,2) +// +def int_hexagon_S2_lsl_r_p : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsl.r.p">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_r_r_acc,SI_ftype_SISISI,3) +// +def int_hexagon_S2_asr_r_r_acc : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.r.r.acc">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_r_r_acc,SI_ftype_SISISI,3) +// +def int_hexagon_S2_asl_r_r_acc : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.r.r.acc">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_acc,SI_ftype_SISISI,3) +// +def int_hexagon_S2_lsr_r_r_acc : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.r.r.acc">; +// +// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_acc,SI_ftype_SISISI,3) +// +def int_hexagon_S2_lsl_r_r_acc : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsl.r.r.acc">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_r_p_acc,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_asr_r_p_acc : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.r.p.acc">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_r_p_acc,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_asl_r_p_acc : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.r.p.acc">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_acc,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_lsr_r_p_acc : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.r.p.acc">; +// +// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_acc,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_lsl_r_p_acc : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsl.r.p.acc">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_r_r_nac,SI_ftype_SISISI,3) +// +def int_hexagon_S2_asr_r_r_nac : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.r.r.nac">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_r_r_nac,SI_ftype_SISISI,3) +// +def int_hexagon_S2_asl_r_r_nac : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.r.r.nac">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_nac,SI_ftype_SISISI,3) +// +def int_hexagon_S2_lsr_r_r_nac : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.r.r.nac">; +// +// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_nac,SI_ftype_SISISI,3) +// +def int_hexagon_S2_lsl_r_r_nac : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsl.r.r.nac">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_r_p_nac,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_asr_r_p_nac : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.r.p.nac">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_r_p_nac,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_asl_r_p_nac : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.r.p.nac">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_nac,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_lsr_r_p_nac : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.r.p.nac">; +// +// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_nac,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_lsl_r_p_nac : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsl.r.p.nac">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_r_r_and,SI_ftype_SISISI,3) +// +def int_hexagon_S2_asr_r_r_and : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.r.r.and">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_r_r_and,SI_ftype_SISISI,3) +// +def int_hexagon_S2_asl_r_r_and : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.r.r.and">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_and,SI_ftype_SISISI,3) +// +def int_hexagon_S2_lsr_r_r_and : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.r.r.and">; +// +// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_and,SI_ftype_SISISI,3) +// +def int_hexagon_S2_lsl_r_r_and : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsl.r.r.and">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_r_r_or,SI_ftype_SISISI,3) +// +def int_hexagon_S2_asr_r_r_or : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.r.r.or">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_r_r_or,SI_ftype_SISISI,3) +// +def int_hexagon_S2_asl_r_r_or : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.r.r.or">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_or,SI_ftype_SISISI,3) +// +def int_hexagon_S2_lsr_r_r_or : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.r.r.or">; +// +// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_or,SI_ftype_SISISI,3) +// +def int_hexagon_S2_lsl_r_r_or : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsl.r.r.or">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_r_p_and,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_asr_r_p_and : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.r.p.and">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_r_p_and,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_asl_r_p_and : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.r.p.and">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_and,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_lsr_r_p_and : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.r.p.and">; +// +// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_and,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_lsl_r_p_and : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsl.r.p.and">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_r_p_or,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_asr_r_p_or : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.r.p.or">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_r_p_or,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_asl_r_p_or : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.r.p.or">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_or,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_lsr_r_p_or : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.r.p.or">; +// +// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_or,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_lsl_r_p_or : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsl.r.p.or">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_r_r_sat,SI_ftype_SISI,2) +// +def int_hexagon_S2_asr_r_r_sat : +Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.r.r.sat">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_r_r_sat,SI_ftype_SISI,2) +// +def int_hexagon_S2_asl_r_r_sat : +Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asl.r.r.sat">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_i_r,SI_ftype_SISI,2) +// +def int_hexagon_S2_asr_i_r : +Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.i.r">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_i_r,SI_ftype_SISI,2) +// +def int_hexagon_S2_lsr_i_r : +Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.lsr.i.r">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_i_r,SI_ftype_SISI,2) +// +def int_hexagon_S2_asl_i_r : +Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asl.i.r">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_i_p,DI_ftype_DISI,2) +// +def int_hexagon_S2_asr_i_p : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.i.p">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_i_p,DI_ftype_DISI,2) +// +def int_hexagon_S2_lsr_i_p : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.i.p">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_i_p,DI_ftype_DISI,2) +// +def int_hexagon_S2_asl_i_p : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.i.p">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_i_r_acc,SI_ftype_SISISI,3) +// +def int_hexagon_S2_asr_i_r_acc : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.i.r.acc">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_acc,SI_ftype_SISISI,3) +// +def int_hexagon_S2_lsr_i_r_acc : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.acc">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_i_r_acc,SI_ftype_SISISI,3) +// +def int_hexagon_S2_asl_i_r_acc : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.acc">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_i_p_acc,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_asr_i_p_acc : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.i.p.acc">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_acc,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_lsr_i_p_acc : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.acc">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_i_p_acc,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_asl_i_p_acc : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.acc">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_i_r_nac,SI_ftype_SISISI,3) +// +def int_hexagon_S2_asr_i_r_nac : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.i.r.nac">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_nac,SI_ftype_SISISI,3) +// +def int_hexagon_S2_lsr_i_r_nac : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.nac">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_i_r_nac,SI_ftype_SISISI,3) +// +def int_hexagon_S2_asl_i_r_nac : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.nac">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_i_p_nac,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_asr_i_p_nac : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.i.p.nac">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_nac,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_lsr_i_p_nac : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.nac">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_i_p_nac,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_asl_i_p_nac : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.nac">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_xacc,SI_ftype_SISISI,3) +// +def int_hexagon_S2_lsr_i_r_xacc : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.xacc">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_i_r_xacc,SI_ftype_SISISI,3) +// +def int_hexagon_S2_asl_i_r_xacc : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.xacc">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_xacc,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_lsr_i_p_xacc : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.xacc">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_i_p_xacc,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_asl_i_p_xacc : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.xacc">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_i_r_and,SI_ftype_SISISI,3) +// +def int_hexagon_S2_asr_i_r_and : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.i.r.and">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_and,SI_ftype_SISISI,3) +// +def int_hexagon_S2_lsr_i_r_and : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.and">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_i_r_and,SI_ftype_SISISI,3) +// +def int_hexagon_S2_asl_i_r_and : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.and">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_i_r_or,SI_ftype_SISISI,3) +// +def int_hexagon_S2_asr_i_r_or : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.i.r.or">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_or,SI_ftype_SISISI,3) +// +def int_hexagon_S2_lsr_i_r_or : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.or">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_i_r_or,SI_ftype_SISISI,3) +// +def int_hexagon_S2_asl_i_r_or : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.or">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_i_p_and,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_asr_i_p_and : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.i.p.and">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_and,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_lsr_i_p_and : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.and">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_i_p_and,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_asl_i_p_and : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.and">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_i_p_or,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_asr_i_p_or : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.i.p.or">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_or,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_lsr_i_p_or : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.or">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_i_p_or,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_asl_i_p_or : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.or">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_i_r_sat,SI_ftype_SISI,2) +// +def int_hexagon_S2_asl_i_r_sat : +Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asl.i.r.sat">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_i_r_rnd,SI_ftype_SISI,2) +// +def int_hexagon_S2_asr_i_r_rnd : +Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.i.r.rnd">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_i_r_rnd_goodsyntax,SI_ftype_SISI,2) +// +def int_hexagon_S2_asr_i_r_rnd_goodsyntax : +Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.i.r.rnd.goodsyntax">; +// +// BUILTIN_INFO(HEXAGON.S2_addasl_rrri,SI_ftype_SISISI,3) +// +def int_hexagon_S2_addasl_rrri : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.addasl.rrri">; +// +// BUILTIN_INFO(HEXAGON.S2_valignib,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_valignib : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.valignib">; +// +// BUILTIN_INFO(HEXAGON.S2_valignrb,DI_ftype_DIDIQI,3) +// +def int_hexagon_S2_valignrb : +Hexagon_di_didiqi_Intrinsic<"HEXAGON.S2.valignrb">; +// +// BUILTIN_INFO(HEXAGON.S2_vspliceib,DI_ftype_DIDISI,3) +// +def int_hexagon_S2_vspliceib : +Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.vspliceib">; +// +// BUILTIN_INFO(HEXAGON.S2_vsplicerb,DI_ftype_DIDIQI,3) +// +def int_hexagon_S2_vsplicerb : +Hexagon_di_didiqi_Intrinsic<"HEXAGON.S2.vsplicerb">; +// +// BUILTIN_INFO(HEXAGON.S2_vsplatrh,DI_ftype_SI,1) +// +def int_hexagon_S2_vsplatrh : +Hexagon_di_si_Intrinsic<"HEXAGON.S2.vsplatrh">; +// +// BUILTIN_INFO(HEXAGON.S2_vsplatrb,SI_ftype_SI,1) +// +def int_hexagon_S2_vsplatrb : +Hexagon_si_si_Intrinsic<"HEXAGON.S2.vsplatrb">; +// +// BUILTIN_INFO(HEXAGON.S2_insert,SI_ftype_SISISISI,4) +// +def int_hexagon_S2_insert : +Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.insert">; +// +// BUILTIN_INFO(HEXAGON.S2_tableidxb_goodsyntax,SI_ftype_SISISISI,4) +// +def int_hexagon_S2_tableidxb_goodsyntax : +Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.tableidxb.goodsyntax">; +// +// BUILTIN_INFO(HEXAGON.S2_tableidxh_goodsyntax,SI_ftype_SISISISI,4) +// +def int_hexagon_S2_tableidxh_goodsyntax : +Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.tableidxh.goodsyntax">; +// +// BUILTIN_INFO(HEXAGON.S2_tableidxw_goodsyntax,SI_ftype_SISISISI,4) +// +def int_hexagon_S2_tableidxw_goodsyntax : +Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.tableidxw.goodsyntax">; +// +// BUILTIN_INFO(HEXAGON.S2_tableidxd_goodsyntax,SI_ftype_SISISISI,4) +// +def int_hexagon_S2_tableidxd_goodsyntax : +Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.tableidxd.goodsyntax">; +// +// BUILTIN_INFO(HEXAGON.S2_extractu,SI_ftype_SISISI,3) +// +def int_hexagon_S2_extractu : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.extractu">; +// +// BUILTIN_INFO(HEXAGON.S2_insertp,DI_ftype_DIDISISI,4) +// +def int_hexagon_S2_insertp : +Hexagon_di_didisisi_Intrinsic<"HEXAGON.S2.insertp">; +// +// BUILTIN_INFO(HEXAGON.S2_extractup,DI_ftype_DISISI,3) +// +def int_hexagon_S2_extractup : +Hexagon_di_disisi_Intrinsic<"HEXAGON.S2.extractup">; +// +// BUILTIN_INFO(HEXAGON.S2_insert_rp,SI_ftype_SISIDI,3) +// +def int_hexagon_S2_insert_rp : +Hexagon_si_sisidi_Intrinsic<"HEXAGON.S2.insert.rp">; +// +// BUILTIN_INFO(HEXAGON.S2_extractu_rp,SI_ftype_SIDI,2) +// +def int_hexagon_S2_extractu_rp : +Hexagon_si_sidi_Intrinsic<"HEXAGON.S2.extractu.rp">; +// +// BUILTIN_INFO(HEXAGON.S2_insertp_rp,DI_ftype_DIDIDI,3) +// +def int_hexagon_S2_insertp_rp : +Hexagon_di_dididi_Intrinsic<"HEXAGON.S2.insertp.rp">; +// +// BUILTIN_INFO(HEXAGON.S2_extractup_rp,DI_ftype_DIDI,2) +// +def int_hexagon_S2_extractup_rp : +Hexagon_di_didi_Intrinsic<"HEXAGON.S2.extractup.rp">; +// +// BUILTIN_INFO(HEXAGON.S2_tstbit_i,QI_ftype_SISI,2) +// +def int_hexagon_S2_tstbit_i : +Hexagon_qi_sisi_Intrinsic<"HEXAGON.S2.tstbit.i">; +// +// BUILTIN_INFO(HEXAGON.S2_setbit_i,SI_ftype_SISI,2) +// +def int_hexagon_S2_setbit_i : +Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.setbit.i">; +// +// BUILTIN_INFO(HEXAGON.S2_togglebit_i,SI_ftype_SISI,2) +// +def int_hexagon_S2_togglebit_i : +Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.togglebit.i">; +// +// BUILTIN_INFO(HEXAGON.S2_clrbit_i,SI_ftype_SISI,2) +// +def int_hexagon_S2_clrbit_i : +Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.clrbit.i">; +// +// BUILTIN_INFO(HEXAGON.S2_tstbit_r,QI_ftype_SISI,2) +// +def int_hexagon_S2_tstbit_r : +Hexagon_qi_sisi_Intrinsic<"HEXAGON.S2.tstbit.r">; +// +// BUILTIN_INFO(HEXAGON.S2_setbit_r,SI_ftype_SISI,2) +// +def int_hexagon_S2_setbit_r : +Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.setbit.r">; +// +// BUILTIN_INFO(HEXAGON.S2_togglebit_r,SI_ftype_SISI,2) +// +def int_hexagon_S2_togglebit_r : +Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.togglebit.r">; +// +// BUILTIN_INFO(HEXAGON.S2_clrbit_r,SI_ftype_SISI,2) +// +def int_hexagon_S2_clrbit_r : +Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.clrbit.r">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_i_vh,DI_ftype_DISI,2) +// +def int_hexagon_S2_asr_i_vh : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.i.vh">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_i_vh,DI_ftype_DISI,2) +// +def int_hexagon_S2_lsr_i_vh : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.i.vh">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_i_vh,DI_ftype_DISI,2) +// +def int_hexagon_S2_asl_i_vh : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.i.vh">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_r_vh,DI_ftype_DISI,2) +// +def int_hexagon_S2_asr_r_vh : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.r.vh">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_r_vh,DI_ftype_DISI,2) +// +def int_hexagon_S2_asl_r_vh : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.r.vh">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_r_vh,DI_ftype_DISI,2) +// +def int_hexagon_S2_lsr_r_vh : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.r.vh">; +// +// BUILTIN_INFO(HEXAGON.S2_lsl_r_vh,DI_ftype_DISI,2) +// +def int_hexagon_S2_lsl_r_vh : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsl.r.vh">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_i_vw,DI_ftype_DISI,2) +// +def int_hexagon_S2_asr_i_vw : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.i.vw">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_i_svw_trun,SI_ftype_DISI,2) +// +def int_hexagon_S2_asr_i_svw_trun : +Hexagon_si_disi_Intrinsic<"HEXAGON.S2.asr.i.svw.trun">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_r_svw_trun,SI_ftype_DISI,2) +// +def int_hexagon_S2_asr_r_svw_trun : +Hexagon_si_disi_Intrinsic<"HEXAGON.S2.asr.r.svw.trun">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_i_vw,DI_ftype_DISI,2) +// +def int_hexagon_S2_lsr_i_vw : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.i.vw">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_i_vw,DI_ftype_DISI,2) +// +def int_hexagon_S2_asl_i_vw : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.i.vw">; +// +// BUILTIN_INFO(HEXAGON.S2_asr_r_vw,DI_ftype_DISI,2) +// +def int_hexagon_S2_asr_r_vw : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.r.vw">; +// +// BUILTIN_INFO(HEXAGON.S2_asl_r_vw,DI_ftype_DISI,2) +// +def int_hexagon_S2_asl_r_vw : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.r.vw">; +// +// BUILTIN_INFO(HEXAGON.S2_lsr_r_vw,DI_ftype_DISI,2) +// +def int_hexagon_S2_lsr_r_vw : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.r.vw">; +// +// BUILTIN_INFO(HEXAGON.S2_lsl_r_vw,DI_ftype_DISI,2) +// +def int_hexagon_S2_lsl_r_vw : +Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsl.r.vw">; +// +// BUILTIN_INFO(HEXAGON.S2_vrndpackwh,SI_ftype_DI,1) +// +def int_hexagon_S2_vrndpackwh : +Hexagon_si_di_Intrinsic<"HEXAGON.S2.vrndpackwh">; +// +// BUILTIN_INFO(HEXAGON.S2_vrndpackwhs,SI_ftype_DI,1) +// +def int_hexagon_S2_vrndpackwhs : +Hexagon_si_di_Intrinsic<"HEXAGON.S2.vrndpackwhs">; +// +// BUILTIN_INFO(HEXAGON.S2_vsxtbh,DI_ftype_SI,1) +// +def int_hexagon_S2_vsxtbh : +Hexagon_di_si_Intrinsic<"HEXAGON.S2.vsxtbh">; +// +// BUILTIN_INFO(HEXAGON.S2_vzxtbh,DI_ftype_SI,1) +// +def int_hexagon_S2_vzxtbh : +Hexagon_di_si_Intrinsic<"HEXAGON.S2.vzxtbh">; +// +// BUILTIN_INFO(HEXAGON.S2_vsathub,SI_ftype_DI,1) +// +def int_hexagon_S2_vsathub : +Hexagon_si_di_Intrinsic<"HEXAGON.S2.vsathub">; +// +// BUILTIN_INFO(HEXAGON.S2_svsathub,SI_ftype_SI,1) +// +def int_hexagon_S2_svsathub : +Hexagon_si_si_Intrinsic<"HEXAGON.S2.svsathub">; +// +// BUILTIN_INFO(HEXAGON.S2_svsathb,SI_ftype_SI,1) +// +def int_hexagon_S2_svsathb : +Hexagon_si_si_Intrinsic<"HEXAGON.S2.svsathb">; +// +// BUILTIN_INFO(HEXAGON.S2_vsathb,SI_ftype_DI,1) +// +def int_hexagon_S2_vsathb : +Hexagon_si_di_Intrinsic<"HEXAGON.S2.vsathb">; +// +// BUILTIN_INFO(HEXAGON.S2_vtrunohb,SI_ftype_DI,1) +// +def int_hexagon_S2_vtrunohb : +Hexagon_si_di_Intrinsic<"HEXAGON.S2.vtrunohb">; +// +// BUILTIN_INFO(HEXAGON.S2_vtrunewh,DI_ftype_DIDI,2) +// +def int_hexagon_S2_vtrunewh : +Hexagon_di_didi_Intrinsic<"HEXAGON.S2.vtrunewh">; +// +// BUILTIN_INFO(HEXAGON.S2_vtrunowh,DI_ftype_DIDI,2) +// +def int_hexagon_S2_vtrunowh : +Hexagon_di_didi_Intrinsic<"HEXAGON.S2.vtrunowh">; +// +// BUILTIN_INFO(HEXAGON.S2_vtrunehb,SI_ftype_DI,1) +// +def int_hexagon_S2_vtrunehb : +Hexagon_si_di_Intrinsic<"HEXAGON.S2.vtrunehb">; +// +// BUILTIN_INFO(HEXAGON.S2_vsxthw,DI_ftype_SI,1) +// +def int_hexagon_S2_vsxthw : +Hexagon_di_si_Intrinsic<"HEXAGON.S2.vsxthw">; +// +// BUILTIN_INFO(HEXAGON.S2_vzxthw,DI_ftype_SI,1) +// +def int_hexagon_S2_vzxthw : +Hexagon_di_si_Intrinsic<"HEXAGON.S2.vzxthw">; +// +// BUILTIN_INFO(HEXAGON.S2_vsatwh,SI_ftype_DI,1) +// +def int_hexagon_S2_vsatwh : +Hexagon_si_di_Intrinsic<"HEXAGON.S2.vsatwh">; +// +// BUILTIN_INFO(HEXAGON.S2_vsatwuh,SI_ftype_DI,1) +// +def int_hexagon_S2_vsatwuh : +Hexagon_si_di_Intrinsic<"HEXAGON.S2.vsatwuh">; +// +// BUILTIN_INFO(HEXAGON.S2_packhl,DI_ftype_SISI,2) +// +def int_hexagon_S2_packhl : +Hexagon_di_sisi_Intrinsic<"HEXAGON.S2.packhl">; +// +// BUILTIN_INFO(HEXAGON.A2_swiz,SI_ftype_SI,1) +// +def int_hexagon_A2_swiz : +Hexagon_si_si_Intrinsic<"HEXAGON.A2.swiz">; +// +// BUILTIN_INFO(HEXAGON.S2_vsathub_nopack,DI_ftype_DI,1) +// +def int_hexagon_S2_vsathub_nopack : +Hexagon_di_di_Intrinsic<"HEXAGON.S2.vsathub.nopack">; +// +// BUILTIN_INFO(HEXAGON.S2_vsathb_nopack,DI_ftype_DI,1) +// +def int_hexagon_S2_vsathb_nopack : +Hexagon_di_di_Intrinsic<"HEXAGON.S2.vsathb.nopack">; +// +// BUILTIN_INFO(HEXAGON.S2_vsatwh_nopack,DI_ftype_DI,1) +// +def int_hexagon_S2_vsatwh_nopack : +Hexagon_di_di_Intrinsic<"HEXAGON.S2.vsatwh.nopack">; +// +// BUILTIN_INFO(HEXAGON.S2_vsatwuh_nopack,DI_ftype_DI,1) +// +def int_hexagon_S2_vsatwuh_nopack : +Hexagon_di_di_Intrinsic<"HEXAGON.S2.vsatwuh.nopack">; +// +// BUILTIN_INFO(HEXAGON.S2_shuffob,DI_ftype_DIDI,2) +// +def int_hexagon_S2_shuffob : +Hexagon_di_didi_Intrinsic<"HEXAGON.S2.shuffob">; +// +// BUILTIN_INFO(HEXAGON.S2_shuffeb,DI_ftype_DIDI,2) +// +def int_hexagon_S2_shuffeb : +Hexagon_di_didi_Intrinsic<"HEXAGON.S2.shuffeb">; +// +// BUILTIN_INFO(HEXAGON.S2_shuffoh,DI_ftype_DIDI,2) +// +def int_hexagon_S2_shuffoh : +Hexagon_di_didi_Intrinsic<"HEXAGON.S2.shuffoh">; +// +// BUILTIN_INFO(HEXAGON.S2_shuffeh,DI_ftype_DIDI,2) +// +def int_hexagon_S2_shuffeh : +Hexagon_di_didi_Intrinsic<"HEXAGON.S2.shuffeh">; +// +// BUILTIN_INFO(HEXAGON.S2_parityp,SI_ftype_DIDI,2) +// +def int_hexagon_S2_parityp : +Hexagon_si_didi_Intrinsic<"HEXAGON.S2.parityp">; +// +// BUILTIN_INFO(HEXAGON.S2_lfsp,DI_ftype_DIDI,2) +// +def int_hexagon_S2_lfsp : +Hexagon_di_didi_Intrinsic<"HEXAGON.S2.lfsp">; +// +// BUILTIN_INFO(HEXAGON.S2_clbnorm,SI_ftype_SI,1) +// +def int_hexagon_S2_clbnorm : +Hexagon_si_si_Intrinsic<"HEXAGON.S2.clbnorm">; +// +// BUILTIN_INFO(HEXAGON.S2_clb,SI_ftype_SI,1) +// +def int_hexagon_S2_clb : +Hexagon_si_si_Intrinsic<"HEXAGON.S2.clb">; +// +// BUILTIN_INFO(HEXAGON.S2_cl0,SI_ftype_SI,1) +// +def int_hexagon_S2_cl0 : +Hexagon_si_si_Intrinsic<"HEXAGON.S2.cl0">; +// +// BUILTIN_INFO(HEXAGON.S2_cl1,SI_ftype_SI,1) +// +def int_hexagon_S2_cl1 : +Hexagon_si_si_Intrinsic<"HEXAGON.S2.cl1">; +// +// BUILTIN_INFO(HEXAGON.S2_clbp,SI_ftype_DI,1) +// +def int_hexagon_S2_clbp : +Hexagon_si_di_Intrinsic<"HEXAGON.S2.clbp">; +// +// BUILTIN_INFO(HEXAGON.S2_cl0p,SI_ftype_DI,1) +// +def int_hexagon_S2_cl0p : +Hexagon_si_di_Intrinsic<"HEXAGON.S2.cl0p">; +// +// BUILTIN_INFO(HEXAGON.S2_cl1p,SI_ftype_DI,1) +// +def int_hexagon_S2_cl1p : +Hexagon_si_di_Intrinsic<"HEXAGON.S2.cl1p">; +// +// BUILTIN_INFO(HEXAGON.S2_brev,SI_ftype_SI,1) +// +def int_hexagon_S2_brev : +Hexagon_si_si_Intrinsic<"HEXAGON.S2.brev">; +// +// BUILTIN_INFO(HEXAGON.S2_ct0,SI_ftype_SI,1) +// +def int_hexagon_S2_ct0 : +Hexagon_si_si_Intrinsic<"HEXAGON.S2.ct0">; +// +// BUILTIN_INFO(HEXAGON.S2_ct1,SI_ftype_SI,1) +// +def int_hexagon_S2_ct1 : +Hexagon_si_si_Intrinsic<"HEXAGON.S2.ct1">; +// +// BUILTIN_INFO(HEXAGON.S2_interleave,DI_ftype_DI,1) +// +def int_hexagon_S2_interleave : +Hexagon_di_di_Intrinsic<"HEXAGON.S2.interleave">; +// +// BUILTIN_INFO(HEXAGON.S2_deinterleave,DI_ftype_DI,1) +// +def int_hexagon_S2_deinterleave : +Hexagon_di_di_Intrinsic<"HEXAGON.S2.deinterleave">; + +// +// BUILTIN_INFO(SI_to_SXTHI_asrh,SI_ftype_SI,1) +// +def int_hexagon_SI_to_SXTHI_asrh : +Hexagon_si_si_Intrinsic<"SI.to.SXTHI.asrh">; + +// +// BUILTIN_INFO(HEXAGON.A4_orn,SI_ftype_SISI,2) +// +def int_hexagon_A4_orn : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.orn">; +// +// BUILTIN_INFO(HEXAGON.A4_andn,SI_ftype_SISI,2) +// +def int_hexagon_A4_andn : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.andn">; +// +// BUILTIN_INFO(HEXAGON.A4_orn,DI_ftype_DIDI,2) +// +def int_hexagon_A4_ornp : +Hexagon_di_didi_Intrinsic<"HEXAGON.A4.ornp">; +// +// BUILTIN_INFO(HEXAGON.A4_andn,DI_ftype_DIDI,2) +// +def int_hexagon_A4_andnp : +Hexagon_di_didi_Intrinsic<"HEXAGON.A4.andnp">; +// +// BUILTIN_INFO(HEXAGON.A4_combineir,DI_ftype_sisi,2) +// +def int_hexagon_A4_combineir : +Hexagon_di_sisi_Intrinsic<"HEXAGON.A4.combineir">; +// +// BUILTIN_INFO(HEXAGON.A4_combineir,DI_ftype_sisi,2) +// +def int_hexagon_A4_combineri : +Hexagon_di_sisi_Intrinsic<"HEXAGON.A4.combineri">; +// +// BUILTIN_INFO(HEXAGON.C4_cmpneq,QI_ftype_SISI,2) +// +def int_hexagon_C4_cmpneq : +Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmpneq">; +// +// BUILTIN_INFO(HEXAGON.C4_cmpneqi,QI_ftype_SISI,2) +// +def int_hexagon_C4_cmpneqi : +Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmpneqi">; +// +// BUILTIN_INFO(HEXAGON.C4_cmplte,QI_ftype_SISI,2) +// +def int_hexagon_C4_cmplte : +Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmplte">; +// +// BUILTIN_INFO(HEXAGON.C4_cmpltei,QI_ftype_SISI,2) +// +def int_hexagon_C4_cmpltei : +Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmpltei">; +// +// BUILTIN_INFO(HEXAGON.C4_cmplteu,QI_ftype_SISI,2) +// +def int_hexagon_C4_cmplteu : +Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmplteu">; +// +// BUILTIN_INFO(HEXAGON.C4_cmplteui,QI_ftype_SISI,2) +// +def int_hexagon_C4_cmplteui : +Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmplteui">; +// +// BUILTIN_INFO(HEXAGON.A4_rcmpneq,SI_ftype_SISI,2) +// +def int_hexagon_A4_rcmpneq : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.rcmpneq">; +// +// BUILTIN_INFO(HEXAGON.A4_rcmpneqi,SI_ftype_SISI,2) +// +def int_hexagon_A4_rcmpneqi : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.rcmpneqi">; +// +// BUILTIN_INFO(HEXAGON.A4_rcmpeq,SI_ftype_SISI,2) +// +def int_hexagon_A4_rcmpeq : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.rcmpeq">; +// +// BUILTIN_INFO(HEXAGON.A4_rcmpeqi,SI_ftype_SISI,2) +// +def int_hexagon_A4_rcmpeqi : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.rcmpeqi">; +// +// BUILTIN_INFO(HEXAGON.C4_fastcorner9,QI_ftype_QIQI,2) +// +def int_hexagon_C4_fastcorner9 : +Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C4.fastcorner9">; +// +// BUILTIN_INFO(HEXAGON.C4_fastcorner9_not,QI_ftype_QIQI,2) +// +def int_hexagon_C4_fastcorner9_not : +Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C4.fastcorner9_not">; +// +// BUILTIN_INFO(HEXAGON.C4_and_andn,QI_ftype_QIQIQI,3) +// +def int_hexagon_C4_and_andn : +Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.and_andn">; +// +// BUILTIN_INFO(HEXAGON.C4_and_and,QI_ftype_QIQIQI,3) +// +def int_hexagon_C4_and_and : +Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.and_and">; +// +// BUILTIN_INFO(HEXAGON.C4_and_orn,QI_ftype_QIQIQI,3) +// +def int_hexagon_C4_and_orn : +Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.and_orn">; +// +// BUILTIN_INFO(HEXAGON.C4_and_or,QI_ftype_QIQIQI,3) +// +def int_hexagon_C4_and_or : +Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.and_or">; +// +// BUILTIN_INFO(HEXAGON.C4_or_andn,QI_ftype_QIQIQI,3) +// +def int_hexagon_C4_or_andn : +Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.or_andn">; +// +// BUILTIN_INFO(HEXAGON.C4_or_and,QI_ftype_QIQIQI,3) +// +def int_hexagon_C4_or_and : +Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.or_and">; +// +// BUILTIN_INFO(HEXAGON.C4_or_orn,QI_ftype_QIQIQI,3) +// +def int_hexagon_C4_or_orn : +Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.or_orn">; +// +// BUILTIN_INFO(HEXAGON.C4_or_or,QI_ftype_QIQIQI,3) +// +def int_hexagon_C4_or_or : +Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.or_or">; +// +// BUILTIN_INFO(HEXAGON.S4_addaddi,SI_ftype_SISISI,3) +// +def int_hexagon_S4_addaddi : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.addaddi">; +// +// BUILTIN_INFO(HEXAGON.S4_subaddi,SI_ftype_SISISI,3) +// +def int_hexagon_S4_subaddi : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.subaddi">; +// +// BUILTIN_INFO(HEXAGON.S4_andnp,DI_ftype_DIDI,2) +// +def int_hexagon_S4_andnp : +Hexagon_di_didi_Intrinsic<"HEXAGON.S4.andnp">; +// +// BUILTIN_INFO(HEXAGON.S4_ornp,DI_ftype_DIDI,2) +// +def int_hexagon_S4_ornp : +Hexagon_di_didi_Intrinsic<"HEXAGON.S4.ornp">; +// +// BUILTIN_INFO(HEXAGON.M4_xor_xacc,DI_ftype_DIDIDI,3) +// +def int_hexagon_M4_xor_xacc : +Hexagon_di_dididi_Intrinsic<"HEXAGON.M4.xor_xacc">; +// +// BUILTIN_INFO(HEXAGON.M4_and_and,SI_ftype_SISISI,3) +// +def int_hexagon_M4_and_and : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.and_and">; +// +// BUILTIN_INFO(HEXAGON.M4_and_andn,SI_ftype_SISISI,3) +// +def int_hexagon_M4_and_andn : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.and_andn">; +// +// BUILTIN_INFO(HEXAGON.M4_and_or,SI_ftype_SISISI,3) +// +def int_hexagon_M4_and_or : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.and_or">; +// +// BUILTIN_INFO(HEXAGON.M4_and_xor,SI_ftype_SISISI,3) +// +def int_hexagon_M4_and_xor : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.and_xor">; +// +// BUILTIN_INFO(HEXAGON.M4_xor_and,SI_ftype_SISISI,3) +// +def int_hexagon_M4_xor_or : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.xor_or">; +// +// BUILTIN_INFO(HEXAGON.M4_xor_or,SI_ftype_SISISI,3) +// +def int_hexagon_M4_xor_and : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.xor_and">; +// +// BUILTIN_INFO(HEXAGON.M4_xor_andn,SI_ftype_SISISI,3) +// +def int_hexagon_M4_xor_andn : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.xor_andn">; +// +// BUILTIN_INFO(HEXAGON.M4_or_and,SI_ftype_SISISI,3) +// +def int_hexagon_M4_or_and : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.or_and">; +// +// BUILTIN_INFO(HEXAGON.M4_or_or,SI_ftype_SISISI,3) +// +def int_hexagon_M4_or_or : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.or_or">; +// +// BUILTIN_INFO(HEXAGON.M4_or_xor,SI_ftype_SISISI,3) +// +def int_hexagon_M4_or_xor : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.or_xor">; +// +// BUILTIN_INFO(HEXAGON.M4_or_andn,SI_ftype_SISISI,3) +// +def int_hexagon_M4_or_andn : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.or_andn">; +// +// BUILTIN_INFO(HEXAGON.S4_or_andix,SI_ftype_SISISI,3) +// +def int_hexagon_S4_or_andix : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.or_andix">; +// +// BUILTIN_INFO(HEXAGON.S4_or_andi,SI_ftype_SISISI,3) +// +def int_hexagon_S4_or_andi : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.or_andi">; +// +// BUILTIN_INFO(HEXAGON.S4_or_ori,SI_ftype_SISISI,3) +// +def int_hexagon_S4_or_ori : +Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.or_ori">; +// +// BUILTIN_INFO(HEXAGON.A4_modwrapu,SI_ftype_SISI,2) +// +def int_hexagon_A4_modwrapu : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.modwrapu">; +// +// BUILTIN_INFO(HEXAGON.A4_cround_ri,SI_ftype_SISI,2) +// +def int_hexagon_A4_cround_ri : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.cround_ri">; +// +// BUILTIN_INFO(HEXAGON.A4_cround_rr,SI_ftype_SISI,2) +// +def int_hexagon_A4_cround_rr : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.cround_rr">; +// +// BUILTIN_INFO(HEXAGON.A4_round_ri,SI_ftype_SISI,2) +// +def int_hexagon_A4_round_ri : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.round_ri">; +// +// BUILTIN_INFO(HEXAGON.A4_round_rr,SI_ftype_SISI,2) +// +def int_hexagon_A4_round_rr : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.round_rr">; +// +// BUILTIN_INFO(HEXAGON.A4_round_ri_sat,SI_ftype_SISI,2) +// +def int_hexagon_A4_round_ri_sat : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.round_ri_sat">; +// +// BUILTIN_INFO(HEXAGON.A4_round_rr_sat,SI_ftype_SISI,2) +// +def int_hexagon_A4_round_rr_sat : +Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.round_rr_sat">; diff --git a/contrib/llvm/include/llvm/IntrinsicsX86.td b/contrib/llvm/include/llvm/IntrinsicsX86.td index d445a010cefd..a6fda4a3afc7 100644 --- a/contrib/llvm/include/llvm/IntrinsicsX86.td +++ b/contrib/llvm/include/llvm/IntrinsicsX86.td @@ -145,10 +145,10 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Comparison ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse_cmp_ss : + def int_x86_sse_cmp_ss : GCCBuiltin<"__builtin_ia32_cmpss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_sse_cmp_ps : + def int_x86_sse_cmp_ps : GCCBuiltin<"__builtin_ia32_cmpps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse_comieq_ss : GCCBuiltin<"__builtin_ia32_comieq">, @@ -281,10 +281,10 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // FP comparison ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse2_cmp_sd : + def int_x86_sse2_cmp_sd : GCCBuiltin<"__builtin_ia32_cmpsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_sse2_cmp_pd : + def int_x86_sse2_cmp_pd : GCCBuiltin<"__builtin_ia32_cmppd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_sse2_comieq_sd : GCCBuiltin<"__builtin_ia32_comisdeq">, @@ -452,28 +452,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_i32_ty], [IntrNoMem]>; } -// Integer comparison ops -let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse2_pcmpeq_b : GCCBuiltin<"__builtin_ia32_pcmpeqb128">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, - llvm_v16i8_ty], [IntrNoMem]>; - def int_x86_sse2_pcmpeq_w : GCCBuiltin<"__builtin_ia32_pcmpeqw128">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, - llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_sse2_pcmpeq_d : GCCBuiltin<"__builtin_ia32_pcmpeqd128">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, - llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_sse2_pcmpgt_b : GCCBuiltin<"__builtin_ia32_pcmpgtb128">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, - llvm_v16i8_ty], [IntrNoMem]>; - def int_x86_sse2_pcmpgt_w : GCCBuiltin<"__builtin_ia32_pcmpgtw128">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, - llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_sse2_pcmpgt_d : GCCBuiltin<"__builtin_ia32_pcmpgtd128">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, - llvm_v4i32_ty], [IntrNoMem]>; -} - // Conversion ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_cvtdq2pd : GCCBuiltin<"__builtin_ia32_cvtdq2pd">, @@ -627,8 +605,8 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_ssse3_phadd_sw_128 : GCCBuiltin<"__builtin_ia32_phaddsw128">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, - llvm_v4i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, + llvm_v8i16_ty], [IntrNoMem]>; def int_x86_ssse3_phsub_w : GCCBuiltin<"__builtin_ia32_phsubw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, @@ -655,8 +633,8 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], [IntrNoMem]>; def int_x86_ssse3_pmadd_ub_sw_128 : GCCBuiltin<"__builtin_ia32_pmaddubsw128">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, - llvm_v8i16_ty], [IntrNoMem]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, + llvm_v16i8_ty], [IntrNoMem]>; } // Packed multiply high with round and scale @@ -792,12 +770,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Vector compare, min, max let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse41_pcmpeqq : GCCBuiltin<"__builtin_ia32_pcmpeqq">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], - [IntrNoMem, Commutative]>; - def int_x86_sse42_pcmpgtq : GCCBuiltin<"__builtin_ia32_pcmpgtq">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], - [IntrNoMem]>; def int_x86_sse41_pmaxsb : GCCBuiltin<"__builtin_ia32_pmaxsb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem, Commutative]>; @@ -919,7 +891,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Vector sum of absolute differences let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw128">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i32_ty], + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i32_ty], [IntrNoMem, Commutative]>; } @@ -932,13 +904,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Test instruction with bitwise comparison. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_ptestz : GCCBuiltin<"__builtin_ia32_ptestz128">, - Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_sse41_ptestc : GCCBuiltin<"__builtin_ia32_ptestc128">, - Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_sse41_ptestnzc : GCCBuiltin<"__builtin_ia32_ptestnzc128">, - Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; } @@ -1120,17 +1092,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx_vpermil_pd : GCCBuiltin<"__builtin_ia32_vpermilpd">, + def int_x86_avx_vpermil_pd : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx_vpermil_ps : GCCBuiltin<"__builtin_ia32_vpermilps">, + def int_x86_avx_vpermil_ps : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx_vpermil_pd_256 : GCCBuiltin<"__builtin_ia32_vpermilpd256">, + def int_x86_avx_vpermil_pd_256 : Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx_vpermil_ps_256 : GCCBuiltin<"__builtin_ia32_vpermilps256">, + def int_x86_avx_vpermil_ps_256 : Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; } @@ -1281,13 +1253,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Vector load with broadcast let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_vbroadcastss : + def int_x86_avx_vbroadcast_ss : GCCBuiltin<"__builtin_ia32_vbroadcastss">, Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_x86_avx_vbroadcast_sd_256 : GCCBuiltin<"__builtin_ia32_vbroadcastsd256">, Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem]>; - def int_x86_avx_vbroadcastss_256 : + def int_x86_avx_vbroadcast_ss_256 : GCCBuiltin<"__builtin_ia32_vbroadcastss256">, Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_x86_avx_vbroadcastf128_pd_256 : @@ -1300,12 +1272,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // SIMD load ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx_loadu_pd_256 : GCCBuiltin<"__builtin_ia32_loadupd256">, - Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem]>; - def int_x86_avx_loadu_ps_256 : GCCBuiltin<"__builtin_ia32_loadups256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; - def int_x86_avx_loadu_dq_256 : GCCBuiltin<"__builtin_ia32_loaddqu256">, - Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty], [IntrReadMem]>; def int_x86_avx_ldu_dq_256 : GCCBuiltin<"__builtin_ia32_lddqu256">, Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty], [IntrReadMem]>; } @@ -1360,6 +1326,1046 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v8f32_ty, llvm_v8f32_ty], []>; } +//===----------------------------------------------------------------------===// +// AVX2 + +// Integer arithmetic ops. +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem]>; + def int_x86_avx2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem]>; + def int_x86_avx2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem]>; + def int_x86_avx2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem]>; + def int_x86_avx2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, + llvm_v8i32_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pmul_dq : GCCBuiltin<"__builtin_ia32_pmuldq256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, + llvm_v8i32_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem, Commutative]>; +} + +// Vector min, max +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pmaxu_w : GCCBuiltin<"__builtin_ia32_pmaxuw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pmaxu_d : GCCBuiltin<"__builtin_ia32_pmaxud256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v8i32_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pmaxs_b : GCCBuiltin<"__builtin_ia32_pmaxsb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pmaxs_d : GCCBuiltin<"__builtin_ia32_pmaxsd256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v8i32_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pminu_b : GCCBuiltin<"__builtin_ia32_pminub256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pminu_w : GCCBuiltin<"__builtin_ia32_pminuw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pminu_d : GCCBuiltin<"__builtin_ia32_pminud256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v8i32_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pmins_b : GCCBuiltin<"__builtin_ia32_pminsb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_pmins_d : GCCBuiltin<"__builtin_ia32_pminsd256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v8i32_ty], [IntrNoMem, Commutative]>; +} + +// Integer shift ops. +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_psll_w : GCCBuiltin<"__builtin_ia32_psllw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_avx2_psll_d : GCCBuiltin<"__builtin_ia32_pslld256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_avx2_psll_q : GCCBuiltin<"__builtin_ia32_psllq256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v2i64_ty], [IntrNoMem]>; + def int_x86_avx2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_avx2_psrl_d : GCCBuiltin<"__builtin_ia32_psrld256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_avx2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v2i64_ty], [IntrNoMem]>; + def int_x86_avx2_psra_w : GCCBuiltin<"__builtin_ia32_psraw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_avx2_psra_d : GCCBuiltin<"__builtin_ia32_psrad256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v4i32_ty], [IntrNoMem]>; + + def int_x86_avx2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_psll_dq_bs : GCCBuiltin<"__builtin_ia32_pslldqi256_byteshift">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi256_byteshift">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_i32_ty], [IntrNoMem]>; +} + +// Pack ops. +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_packsswb : GCCBuiltin<"__builtin_ia32_packsswb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem]>; + def int_x86_avx2_packssdw : GCCBuiltin<"__builtin_ia32_packssdw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, + llvm_v8i32_ty], [IntrNoMem]>; + def int_x86_avx2_packuswb : GCCBuiltin<"__builtin_ia32_packuswb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem]>; + def int_x86_avx2_packusdw : GCCBuiltin<"__builtin_ia32_packusdw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, + llvm_v8i32_ty], [IntrNoMem]>; +} + +// Absolute value ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; + def int_x86_avx2_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; + def int_x86_avx2_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +} + +// Horizontal arithmetic ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem]>; + def int_x86_avx2_phadd_d : GCCBuiltin<"__builtin_ia32_phaddd256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v8i32_ty], [IntrNoMem]>; + def int_x86_avx2_phadd_sw : GCCBuiltin<"__builtin_ia32_phaddsw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem]>; + def int_x86_avx2_phsub_w : GCCBuiltin<"__builtin_ia32_phsubw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem]>; + def int_x86_avx2_phsub_d : GCCBuiltin<"__builtin_ia32_phsubd256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v8i32_ty], [IntrNoMem]>; + def int_x86_avx2_phsub_sw : GCCBuiltin<"__builtin_ia32_phsubsw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem]>; + def int_x86_avx2_pmadd_ub_sw : GCCBuiltin<"__builtin_ia32_pmaddubsw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem]>; +} + +// Sign ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_psign_b : GCCBuiltin<"__builtin_ia32_psignb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem]>; + def int_x86_avx2_psign_w : GCCBuiltin<"__builtin_ia32_psignw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem]>; + def int_x86_avx2_psign_d : GCCBuiltin<"__builtin_ia32_psignd256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, + llvm_v8i32_ty], [IntrNoMem]>; +} + +// Packed multiply high with round and scale +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_pmul_hr_sw : GCCBuiltin<"__builtin_ia32_pmulhrsw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v16i16_ty], [IntrNoMem, Commutative]>; +} + +// Vector sign and zero extend +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_pmovsxbd : GCCBuiltin<"__builtin_ia32_pmovsxbd256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_avx2_pmovsxbq : GCCBuiltin<"__builtin_ia32_pmovsxbq256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_avx2_pmovsxbw : GCCBuiltin<"__builtin_ia32_pmovsxbw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_avx2_pmovsxdq : GCCBuiltin<"__builtin_ia32_pmovsxdq256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_avx2_pmovsxwd : GCCBuiltin<"__builtin_ia32_pmovsxwd256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_avx2_pmovsxwq : GCCBuiltin<"__builtin_ia32_pmovsxwq256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_avx2_pmovzxbd : GCCBuiltin<"__builtin_ia32_pmovzxbd256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_avx2_pmovzxbq : GCCBuiltin<"__builtin_ia32_pmovzxbq256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_avx2_pmovzxbw : GCCBuiltin<"__builtin_ia32_pmovzxbw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_avx2_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_avx2_pmovzxwd : GCCBuiltin<"__builtin_ia32_pmovzxwd256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_avx2_pmovzxwq : GCCBuiltin<"__builtin_ia32_pmovzxwq256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty], + [IntrNoMem]>; +} + +// Vector blend +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_pblendvb : GCCBuiltin<"__builtin_ia32_pblendvb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem]>; + def int_x86_avx2_pblendw : GCCBuiltin<"__builtin_ia32_pblendw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_pblendd_128 : GCCBuiltin<"__builtin_ia32_pblendd128">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx2_pblendd_256 : GCCBuiltin<"__builtin_ia32_pblendd256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, + llvm_i32_ty], [IntrNoMem]>; +} + +// Vector load with broadcast +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_vbroadcast_ss_ps : + GCCBuiltin<"__builtin_ia32_vbroadcastss_ps">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_avx2_vbroadcast_sd_pd_256 : + GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd256">, + Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_avx2_vbroadcast_ss_ps_256 : + GCCBuiltin<"__builtin_ia32_vbroadcastss_ps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_avx2_vbroadcasti128 : + GCCBuiltin<"__builtin_ia32_vbroadcastsi256">, + Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>; + def int_x86_avx2_pbroadcastb_128 : + GCCBuiltin<"__builtin_ia32_pbroadcastb128">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_avx2_pbroadcastb_256 : + GCCBuiltin<"__builtin_ia32_pbroadcastb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_avx2_pbroadcastw_128 : + GCCBuiltin<"__builtin_ia32_pbroadcastw128">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_avx2_pbroadcastw_256 : + GCCBuiltin<"__builtin_ia32_pbroadcastw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_avx2_pbroadcastd_128 : + GCCBuiltin<"__builtin_ia32_pbroadcastd128">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_avx2_pbroadcastd_256 : + GCCBuiltin<"__builtin_ia32_pbroadcastd256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_avx2_pbroadcastq_128 : + GCCBuiltin<"__builtin_ia32_pbroadcastq128">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + def int_x86_avx2_pbroadcastq_256 : + GCCBuiltin<"__builtin_ia32_pbroadcastq256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; +} + +// Vector permutation +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_permd : GCCBuiltin<"__builtin_ia32_permvarsi256">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; + def int_x86_avx2_permq : GCCBuiltin<"__builtin_ia32_permdi256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx2_permps : GCCBuiltin<"__builtin_ia32_permvarsf256">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; + def int_x86_avx2_permpd : GCCBuiltin<"__builtin_ia32_permdf256">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx2_vperm2i128 : GCCBuiltin<"__builtin_ia32_permti256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; +} + +// Vector extract and insert +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_vextracti128 : GCCBuiltin<"__builtin_ia32_extract128i256">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx2_vinserti128 : GCCBuiltin<"__builtin_ia32_insert128i256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; +} + +// Conditional load ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_maskload_d : GCCBuiltin<"__builtin_ia32_maskloadd">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_v4i32_ty], [IntrReadMem]>; + def int_x86_avx2_maskload_q : GCCBuiltin<"__builtin_ia32_maskloadq">, + Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty], [IntrReadMem]>; + def int_x86_avx2_maskload_d_256 : GCCBuiltin<"__builtin_ia32_maskloadd256">, + Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_v8i32_ty], [IntrReadMem]>; + def int_x86_avx2_maskload_q_256 : GCCBuiltin<"__builtin_ia32_maskloadq256">, + Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty], [IntrReadMem]>; +} + +// Conditional store ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_maskstore_d : GCCBuiltin<"__builtin_ia32_maskstored">, + Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty], []>; + def int_x86_avx2_maskstore_q : GCCBuiltin<"__builtin_ia32_maskstoreq">, + Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>; + def int_x86_avx2_maskstore_d_256 : + GCCBuiltin<"__builtin_ia32_maskstored256">, + Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty], []>; + def int_x86_avx2_maskstore_q_256 : + GCCBuiltin<"__builtin_ia32_maskstoreq256">, + Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty], []>; +} + +// Variable bit shift ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_psllv_d : GCCBuiltin<"__builtin_ia32_psllv4si">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_avx2_psllv_d_256 : GCCBuiltin<"__builtin_ia32_psllv8si">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; + def int_x86_avx2_psllv_q : GCCBuiltin<"__builtin_ia32_psllv2di">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_avx2_psllv_q_256 : GCCBuiltin<"__builtin_ia32_psllv4di">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + + def int_x86_avx2_psrlv_d : GCCBuiltin<"__builtin_ia32_psrlv4si">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_avx2_psrlv_d_256 : GCCBuiltin<"__builtin_ia32_psrlv8si">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; + def int_x86_avx2_psrlv_q : GCCBuiltin<"__builtin_ia32_psrlv2di">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_avx2_psrlv_q_256 : GCCBuiltin<"__builtin_ia32_psrlv4di">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + + def int_x86_avx2_psrav_d : GCCBuiltin<"__builtin_ia32_psrav4si">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_avx2_psrav_d_256 : GCCBuiltin<"__builtin_ia32_psrav8si">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +} + +// Misc. +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">, + Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; + def int_x86_avx2_pshuf_b : GCCBuiltin<"__builtin_ia32_pshufb256">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, + llvm_v32i8_ty], [IntrNoMem]>; + def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty, + llvm_i32_ty], [IntrNoMem, Commutative]>; + def int_x86_avx2_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa256">, + Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>; +} + +//===----------------------------------------------------------------------===// +// FMA4 + +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_fma4_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmadd_ps : GCCBuiltin<"__builtin_ia32_vfmaddps">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmadd_pd : GCCBuiltin<"__builtin_ia32_vfmaddpd">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmsub_sd : GCCBuiltin<"__builtin_ia32_vfmsubsd">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmsub_ps : GCCBuiltin<"__builtin_ia32_vfmsubps">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmsub_pd : GCCBuiltin<"__builtin_ia32_vfmsubpd">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubps256">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubpd256">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + def int_x86_fma4_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_fma4_vfnmadd_sd : GCCBuiltin<"__builtin_ia32_vfnmaddsd">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + def int_x86_fma4_vfnmadd_ps : GCCBuiltin<"__builtin_ia32_vfnmaddps">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_fma4_vfnmadd_pd : GCCBuiltin<"__builtin_ia32_vfnmaddpd">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + def int_x86_fma4_vfnmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmaddps256">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; + def int_x86_fma4_vfnmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmaddpd256">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + def int_x86_fma4_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_fma4_vfnmsub_sd : GCCBuiltin<"__builtin_ia32_vfnmsubsd">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + def int_x86_fma4_vfnmsub_ps : GCCBuiltin<"__builtin_ia32_vfnmsubps">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_fma4_vfnmsub_pd : GCCBuiltin<"__builtin_ia32_vfnmsubpd">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + def int_x86_fma4_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; + def int_x86_fma4_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmaddsub_pd : GCCBuiltin<"__builtin_ia32_vfmaddsubpd">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmaddsub_ps_256 : + GCCBuiltin<"__builtin_ia32_vfmaddsubps256">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmaddsub_pd_256 : + GCCBuiltin<"__builtin_ia32_vfmaddsubpd256">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmsubadd_pd : GCCBuiltin<"__builtin_ia32_vfmsubaddpd">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmsubadd_ps_256 : + GCCBuiltin<"__builtin_ia32_vfmsubaddps256">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; + def int_x86_fma4_vfmsubadd_pd_256 : + GCCBuiltin<"__builtin_ia32_vfmsubaddpd256">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; +} + +//===----------------------------------------------------------------------===// +// XOP + + def int_x86_xop_vpermil2pd : GCCBuiltin<"__builtin_ia32_vpermil2pd">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_xop_vpermil2pd_256 : + GCCBuiltin<"__builtin_ia32_vpermil2pd256">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, + llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_xop_vpermil2ps : GCCBuiltin<"__builtin_ia32_vpermil2ps">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpermil2ps_256 : + GCCBuiltin<"__builtin_ia32_vpermil2ps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, + llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_xop_vfrcz_pd : + GCCBuiltin<"__builtin_ia32_vfrczpd">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_xop_vfrcz_ps : + GCCBuiltin<"__builtin_ia32_vfrczps">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_xop_vfrcz_sd : + GCCBuiltin<"__builtin_ia32_vfrczsd">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + def int_x86_xop_vfrcz_ss : + GCCBuiltin<"__builtin_ia32_vfrczss">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_xop_vfrcz_pd_256 : + GCCBuiltin<"__builtin_ia32_vfrczpd256">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + def int_x86_xop_vfrcz_ps_256 : + GCCBuiltin<"__builtin_ia32_vfrczps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; + def int_x86_xop_vpcmov : + GCCBuiltin<"__builtin_ia32_vpcmov">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcmov_256 : + GCCBuiltin<"__builtin_ia32_vpcmov_256">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomeqb : + GCCBuiltin<"__builtin_ia32_vpcomeqb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomeqw : + GCCBuiltin<"__builtin_ia32_vpcomeqw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomeqd : + GCCBuiltin<"__builtin_ia32_vpcomeqd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomeqq : + GCCBuiltin<"__builtin_ia32_vpcomeqq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomequb : + GCCBuiltin<"__builtin_ia32_vpcomequb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomequd : + GCCBuiltin<"__builtin_ia32_vpcomequd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomequq : + GCCBuiltin<"__builtin_ia32_vpcomequq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomequw : + GCCBuiltin<"__builtin_ia32_vpcomequw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomfalseb : + GCCBuiltin<"__builtin_ia32_vpcomfalseb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomfalsed : + GCCBuiltin<"__builtin_ia32_vpcomfalsed">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomfalseq : + GCCBuiltin<"__builtin_ia32_vpcomfalseq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomfalseub : + GCCBuiltin<"__builtin_ia32_vpcomfalseub">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomfalseud : + GCCBuiltin<"__builtin_ia32_vpcomfalseud">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomfalseuq : + GCCBuiltin<"__builtin_ia32_vpcomfalseuq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomfalseuw : + GCCBuiltin<"__builtin_ia32_vpcomfalseuw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomfalsew : + GCCBuiltin<"__builtin_ia32_vpcomfalsew">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgeb : + GCCBuiltin<"__builtin_ia32_vpcomgeb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomged : + GCCBuiltin<"__builtin_ia32_vpcomged">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgeq : + GCCBuiltin<"__builtin_ia32_vpcomgeq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgeub : + GCCBuiltin<"__builtin_ia32_vpcomgeub">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgeud : + GCCBuiltin<"__builtin_ia32_vpcomgeud">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgeuq : + GCCBuiltin<"__builtin_ia32_vpcomgeuq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgeuw : + GCCBuiltin<"__builtin_ia32_vpcomgeuw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgew : + GCCBuiltin<"__builtin_ia32_vpcomgew">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgtb : + GCCBuiltin<"__builtin_ia32_vpcomgtb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgtd : + GCCBuiltin<"__builtin_ia32_vpcomgtd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgtq : + GCCBuiltin<"__builtin_ia32_vpcomgtq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgtub : + GCCBuiltin<"__builtin_ia32_vpcomgtub">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgtud : + GCCBuiltin<"__builtin_ia32_vpcomgtud">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgtuq : + GCCBuiltin<"__builtin_ia32_vpcomgtuq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgtuw : + GCCBuiltin<"__builtin_ia32_vpcomgtuw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomgtw : + GCCBuiltin<"__builtin_ia32_vpcomgtw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomleb : + GCCBuiltin<"__builtin_ia32_vpcomleb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomled : + GCCBuiltin<"__builtin_ia32_vpcomled">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomleq : + GCCBuiltin<"__builtin_ia32_vpcomleq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomleub : + GCCBuiltin<"__builtin_ia32_vpcomleub">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomleud : + GCCBuiltin<"__builtin_ia32_vpcomleud">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomleuq : + GCCBuiltin<"__builtin_ia32_vpcomleuq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomleuw : + GCCBuiltin<"__builtin_ia32_vpcomleuw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomlew : + GCCBuiltin<"__builtin_ia32_vpcomlew">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomltb : + GCCBuiltin<"__builtin_ia32_vpcomltb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomltd : + GCCBuiltin<"__builtin_ia32_vpcomltd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomltq : + GCCBuiltin<"__builtin_ia32_vpcomltq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomltub : + GCCBuiltin<"__builtin_ia32_vpcomltub">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomltud : + GCCBuiltin<"__builtin_ia32_vpcomltud">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomltuq : + GCCBuiltin<"__builtin_ia32_vpcomltuq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomltuw : + GCCBuiltin<"__builtin_ia32_vpcomltuw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomltw : + GCCBuiltin<"__builtin_ia32_vpcomltw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomneb : + GCCBuiltin<"__builtin_ia32_vpcomneb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomned : + GCCBuiltin<"__builtin_ia32_vpcomned">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomneq : + GCCBuiltin<"__builtin_ia32_vpcomneq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomneub : + GCCBuiltin<"__builtin_ia32_vpcomneub">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomneud : + GCCBuiltin<"__builtin_ia32_vpcomneud">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomneuq : + GCCBuiltin<"__builtin_ia32_vpcomneuq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomneuw : + GCCBuiltin<"__builtin_ia32_vpcomneuw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomnew : + GCCBuiltin<"__builtin_ia32_vpcomnew">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomtrueb : + GCCBuiltin<"__builtin_ia32_vpcomtrueb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomtrued : + GCCBuiltin<"__builtin_ia32_vpcomtrued">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomtrueq : + GCCBuiltin<"__builtin_ia32_vpcomtrueq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomtrueub : + GCCBuiltin<"__builtin_ia32_vpcomtrueub">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomtrueud : + GCCBuiltin<"__builtin_ia32_vpcomtrueud">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomtrueuq : + GCCBuiltin<"__builtin_ia32_vpcomtrueuq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomtrueuw : + GCCBuiltin<"__builtin_ia32_vpcomtrueuw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpcomtruew : + GCCBuiltin<"__builtin_ia32_vpcomtruew">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vphaddbd : + GCCBuiltin<"__builtin_ia32_vphaddbd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_xop_vphaddbq : + GCCBuiltin<"__builtin_ia32_vphaddbq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_xop_vphaddbw : + GCCBuiltin<"__builtin_ia32_vphaddbw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_xop_vphadddq : + GCCBuiltin<"__builtin_ia32_vphadddq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_xop_vphaddubd : + GCCBuiltin<"__builtin_ia32_vphaddubd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_xop_vphaddubq : + GCCBuiltin<"__builtin_ia32_vphaddubq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_xop_vphaddubw : + GCCBuiltin<"__builtin_ia32_vphaddubw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_xop_vphaddudq : + GCCBuiltin<"__builtin_ia32_vphaddudq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_xop_vphadduwd : + GCCBuiltin<"__builtin_ia32_vphadduwd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_xop_vphadduwq : + GCCBuiltin<"__builtin_ia32_vphadduwq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_xop_vphaddwd : + GCCBuiltin<"__builtin_ia32_vphaddwd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_xop_vphaddwq : + GCCBuiltin<"__builtin_ia32_vphaddwq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_xop_vphsubbw : + GCCBuiltin<"__builtin_ia32_vphsubbw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_xop_vphsubdq : + GCCBuiltin<"__builtin_ia32_vphsubdq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_xop_vphsubwd : + GCCBuiltin<"__builtin_ia32_vphsubwd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_xop_vpmacsdd : + GCCBuiltin<"__builtin_ia32_vpmacsdd">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpmacsdqh : + GCCBuiltin<"__builtin_ia32_vpmacsdqh">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpmacsdql : + GCCBuiltin<"__builtin_ia32_vpmacsdql">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpmacssdd : + GCCBuiltin<"__builtin_ia32_vpmacssdd">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpmacssdqh : + GCCBuiltin<"__builtin_ia32_vpmacssdqh">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpmacssdql : + GCCBuiltin<"__builtin_ia32_vpmacssdql">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpmacsswd : + GCCBuiltin<"__builtin_ia32_vpmacsswd">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpmacssww : + GCCBuiltin<"__builtin_ia32_vpmacssww">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpmacswd : + GCCBuiltin<"__builtin_ia32_vpmacswd">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpmacsww : + GCCBuiltin<"__builtin_ia32_vpmacsww">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpmadcsswd : + GCCBuiltin<"__builtin_ia32_vpmadcsswd">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpmadcswd : + GCCBuiltin<"__builtin_ia32_vpmadcswd">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpperm : + GCCBuiltin<"__builtin_ia32_vpperm">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vprotb : + GCCBuiltin<"__builtin_ia32_vprotb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vprotd : + GCCBuiltin<"__builtin_ia32_vprotd">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vprotq : + GCCBuiltin<"__builtin_ia32_vprotq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vprotw : + GCCBuiltin<"__builtin_ia32_vprotw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpshab : + GCCBuiltin<"__builtin_ia32_vpshab">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpshad : + GCCBuiltin<"__builtin_ia32_vpshad">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpshaq : + GCCBuiltin<"__builtin_ia32_vpshaq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpshaw : + GCCBuiltin<"__builtin_ia32_vpshaw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + def int_x86_xop_vpshlb : + GCCBuiltin<"__builtin_ia32_vpshlb">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_x86_xop_vpshld : + GCCBuiltin<"__builtin_ia32_vpshld">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + def int_x86_xop_vpshlq : + GCCBuiltin<"__builtin_ia32_vpshlq">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + def int_x86_xop_vpshlw : + GCCBuiltin<"__builtin_ia32_vpshlw">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + //===----------------------------------------------------------------------===// // MMX @@ -1587,13 +2593,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_mmx_pcmpeq_b : GCCBuiltin<"__builtin_ia32_pcmpeqb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, - llvm_x86mmx_ty], [IntrNoMem]>; + llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pcmpeq_w : GCCBuiltin<"__builtin_ia32_pcmpeqw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, - llvm_x86mmx_ty], [IntrNoMem]>; + llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pcmpeq_d : GCCBuiltin<"__builtin_ia32_pcmpeqd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, - llvm_x86mmx_ty], [IntrNoMem]>; + llvm_x86mmx_ty], [IntrNoMem, Commutative]>; def int_x86_mmx_pcmpgt_b : GCCBuiltin<"__builtin_ia32_pcmpgtb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, @@ -1629,3 +2635,63 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; } + +//===----------------------------------------------------------------------===// +// BMI + +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_bmi_bextr_32 : GCCBuiltin<"__builtin_ia32_bextr_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_bmi_bextr_64 : GCCBuiltin<"__builtin_ia32_bextr_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + def int_x86_bmi_bzhi_32 : GCCBuiltin<"__builtin_ia32_bzhi_si">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_bmi_bzhi_64 : GCCBuiltin<"__builtin_ia32_bzhi_di">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + def int_x86_bmi_pdep_32 : GCCBuiltin<"__builtin_ia32_pdep_si">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_bmi_pdep_64 : GCCBuiltin<"__builtin_ia32_pdep_di">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + def int_x86_bmi_pext_32 : GCCBuiltin<"__builtin_ia32_pext_si">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_bmi_pext_64 : GCCBuiltin<"__builtin_ia32_pext_di">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; +} + +//===----------------------------------------------------------------------===// +// FS/GS Base + +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_rdfsbase_32 : GCCBuiltin<"__builtin_ia32_rdfsbase32">, + Intrinsic<[llvm_i32_ty], []>; + def int_x86_rdgsbase_32 : GCCBuiltin<"__builtin_ia32_rdgsbase32">, + Intrinsic<[llvm_i32_ty], []>; + def int_x86_rdfsbase_64 : GCCBuiltin<"__builtin_ia32_rdfsbase64">, + Intrinsic<[llvm_i64_ty], []>; + def int_x86_rdgsbase_64 : GCCBuiltin<"__builtin_ia32_rdgsbase64">, + Intrinsic<[llvm_i64_ty], []>; + def int_x86_wrfsbase_32 : GCCBuiltin<"__builtin_ia32_wrfsbase32">, + Intrinsic<[], [llvm_i32_ty]>; + def int_x86_wrgsbase_32 : GCCBuiltin<"__builtin_ia32_wrgsbase32">, + Intrinsic<[], [llvm_i32_ty]>; + def int_x86_wrfsbase_64 : GCCBuiltin<"__builtin_ia32_wrfsbase64">, + Intrinsic<[], [llvm_i64_ty]>; + def int_x86_wrgsbase_64 : GCCBuiltin<"__builtin_ia32_wrgsbase64">, + Intrinsic<[], [llvm_i64_ty]>; +} + +//===----------------------------------------------------------------------===// +// Half float conversion + +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps">, + Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty], + [IntrNoMem]>; +} diff --git a/contrib/llvm/include/llvm/LLVMContext.h b/contrib/llvm/include/llvm/LLVMContext.h index 65146c31aaa3..18adcd1e3c23 100644 --- a/contrib/llvm/include/llvm/LLVMContext.h +++ b/contrib/llvm/include/llvm/LLVMContext.h @@ -19,6 +19,7 @@ namespace llvm { class LLVMContextImpl; class StringRef; +class Twine; class Instruction; class Module; class SMDiagnostic; @@ -40,7 +41,9 @@ class LLVMContext { enum { MD_dbg = 0, // "dbg" MD_tbaa = 1, // "tbaa" - MD_prof = 2 // "prof" + MD_prof = 2, // "prof" + MD_fpaccuracy = 3, // "fpaccuracy" + MD_range = 4 // "range" }; /// getMDKindID - Return a unique non-zero ID for the specified metadata kind. @@ -79,9 +82,9 @@ class LLVMContext { /// be prepared to drop the erroneous construct on the floor and "not crash". /// The generated code need not be correct. The error message will be /// implicitly prefixed with "error: " and should not end with a ".". - void emitError(unsigned LocCookie, StringRef ErrorStr); - void emitError(const Instruction *I, StringRef ErrorStr); - void emitError(StringRef ErrorStr); + void emitError(unsigned LocCookie, const Twine &ErrorStr); + void emitError(const Instruction *I, const Twine &ErrorStr); + void emitError(const Twine &ErrorStr); private: // DO NOT IMPLEMENT diff --git a/contrib/llvm/include/llvm/LinkAllPasses.h b/contrib/llvm/include/llvm/LinkAllPasses.h index f690d045d172..2258d45ce90a 100644 --- a/contrib/llvm/include/llvm/LinkAllPasses.h +++ b/contrib/llvm/include/llvm/LinkAllPasses.h @@ -31,6 +31,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Vectorize.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include @@ -69,7 +70,7 @@ namespace { (void) llvm::createEdgeProfilerPass(); (void) llvm::createOptimalEdgeProfilerPass(); (void) llvm::createPathProfilerPass(); - (void) llvm::createGCOVProfilerPass(true, true, false); + (void) llvm::createGCOVProfilerPass(); (void) llvm::createFunctionInliningPass(); (void) llvm::createAlwaysInlinerPass(); (void) llvm::createGlobalDCEPass(); @@ -97,6 +98,7 @@ namespace { (void) llvm::createNoAAPass(); (void) llvm::createNoProfileInfoPass(); (void) llvm::createObjCARCAliasAnalysisPass(); + (void) llvm::createObjCARCAPElimPass(); (void) llvm::createObjCARCExpandPass(); (void) llvm::createObjCARCContractPass(); (void) llvm::createObjCARCOptPass(); @@ -150,6 +152,7 @@ namespace { (void) llvm::createCorrelatedValuePropagationPass(); (void) llvm::createMemDepPrinter(); (void) llvm::createInstructionSimplifierPass(); + (void) llvm::createBBVectorizePass(); (void)new llvm::IntervalPartition(); (void)new llvm::FindUsedTypes(); diff --git a/contrib/llvm/include/llvm/Linker.h b/contrib/llvm/include/llvm/Linker.h index 88908fbd72a7..1ebcd6b53863 100644 --- a/contrib/llvm/include/llvm/Linker.h +++ b/contrib/llvm/include/llvm/Linker.h @@ -15,14 +15,15 @@ #define LLVM_LINKER_H #include +#include #include -#include "llvm/ADT/StringRef.h" namespace llvm { namespace sys { class Path; } class Module; class LLVMContext; +class StringRef; /// This class provides the core functionality of linking in LLVM. It retains a /// Module object which is the composite of the modules and libraries linked diff --git a/contrib/llvm/include/llvm/MC/MCAsmBackend.h b/contrib/llvm/include/llvm/MC/MCAsmBackend.h index 4a0cf37a6eb2..05e6286b7cc5 100644 --- a/contrib/llvm/include/llvm/MC/MCAsmBackend.h +++ b/contrib/llvm/include/llvm/MC/MCAsmBackend.h @@ -12,17 +12,20 @@ #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCFixup.h" -#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" namespace llvm { +class MCAsmLayout; +class MCAssembler; class MCELFObjectTargetWriter; -class MCFixup; +struct MCFixupKindInfo; +class MCFragment; class MCInst; +class MCInstFragment; class MCObjectWriter; class MCSection; -template -class SmallVectorImpl; +class MCValue; class raw_ostream; /// MCAsmBackend - Generic interface to target specific assembler backends. @@ -44,8 +47,8 @@ class MCAsmBackend { /// createELFObjectTargetWriter - Create a new ELFObjectTargetWriter to enable /// non-standard ELFObjectWriters. virtual MCELFObjectTargetWriter *createELFObjectTargetWriter() const { - assert(0 && "createELFObjectTargetWriter is not supported by asm backend"); - return 0; + llvm_unreachable("createELFObjectTargetWriter is not supported by asm " + "backend"); } /// hasReliableSymbolDifference - Check whether this target implements @@ -85,12 +88,21 @@ class MCAsmBackend { /// getFixupKindInfo - Get information on a fixup kind. virtual const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const; + /// processFixupValue - Target hook to adjust the literal value of a fixup + /// if necessary. IsResolved signals whether the caller believes a relocation + /// is needed; the target can modify the value. The default does nothing. + virtual void processFixupValue(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFixup &Fixup, const MCFragment *DF, + MCValue &Target, uint64_t &Value, + bool &IsResolved) {} + /// @} - /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided + /// applyFixup - Apply the \arg Value for given \arg Fixup into the provided /// data fragment, at the offset specified by the fixup and following the /// fixup kind as appropriate. - virtual void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const = 0; /// @} @@ -98,11 +110,18 @@ class MCAsmBackend { /// @name Target Relaxation Interfaces /// @{ - /// MayNeedRelaxation - Check whether the given instruction may need + /// mayNeedRelaxation - Check whether the given instruction may need /// relaxation. /// /// \param Inst - The instruction to test. - virtual bool MayNeedRelaxation(const MCInst &Inst) const = 0; + virtual bool mayNeedRelaxation(const MCInst &Inst) const = 0; + + /// fixupNeedsRelaxation - Target specific predicate for whether a given + /// fixup requires the associated instruction to be relaxed. + virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const = 0; /// RelaxInstruction - Relax the instruction in the given fragment to the next /// wider instruction. @@ -110,20 +129,20 @@ class MCAsmBackend { /// \param Inst - The instruction to relax, which may be the same as the /// output. /// \parm Res [output] - On return, the relaxed instruction. - virtual void RelaxInstruction(const MCInst &Inst, MCInst &Res) const = 0; + virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const = 0; /// @} - /// WriteNopData - Write an (optimal) nop sequence of Count bytes to the given + /// writeNopData - Write an (optimal) nop sequence of Count bytes to the given /// output. If the target cannot generate such a sequence, it should return an /// error. /// /// \return - True on success. - virtual bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const = 0; + virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const = 0; - /// HandleAssemblerFlag - Handle any target-specific assembler flags. + /// handleAssemblerFlag - Handle any target-specific assembler flags. /// By default, do nothing. - virtual void HandleAssemblerFlag(MCAssemblerFlag Flag) {} + virtual void handleAssemblerFlag(MCAssemblerFlag Flag) {} }; } // End llvm namespace diff --git a/contrib/llvm/include/llvm/MC/MCAsmInfo.h b/contrib/llvm/include/llvm/MC/MCAsmInfo.h index c3c296e23dc1..0f67c993714c 100644 --- a/contrib/llvm/include/llvm/MC/MCAsmInfo.h +++ b/contrib/llvm/include/llvm/MC/MCAsmInfo.h @@ -36,10 +36,6 @@ namespace llvm { enum LCOMMType { None, NoAlignment, ByteAlignment }; } - namespace Structors { - enum OutputOrder { None, PriorityOrder, ReversePriorityOrder }; - } - /// MCAsmInfo - This class is intended to be used as a base class for asm /// properties and features specific to the target. class MCAsmInfo { @@ -47,7 +43,7 @@ namespace llvm { //===------------------------------------------------------------------===// // Properties to be set by the target writer, used to configure asm printer. // - + /// PointerSize - Pointer size in bytes. /// Default is 4. unsigned PointerSize; @@ -72,11 +68,6 @@ namespace llvm { /// the macho-specific .tbss directive for emitting thread local BSS Symbols bool HasMachoTBSSDirective; // Default is false. - /// StructorOutputOrder - Whether the static ctor/dtor list should be output - /// in no particular order, in order of increasing priority or the reverse: - /// in order of decreasing priority (the default). - Structors::OutputOrder StructorOutputOrder; // Default is reverse order. - /// HasStaticCtorDtorReferenceInStaticMode - True if the compiler should /// emit a ".reference .constructors_used" or ".reference .destructors_used" /// directive after the a static ctor/dtor list. This directive is only @@ -152,6 +143,10 @@ namespace llvm { /// symbol names. This defaults to true. bool AllowPeriodsInName; + /// AllowUTF8 - This is true if the assembler accepts UTF-8 input. + // FIXME: Make this a more general encoding setting? + bool AllowUTF8; + //===--- Data Emission Directives -------------------------------------===// /// ZeroDirective - this should be set to the directive used to get some @@ -189,6 +184,11 @@ namespace llvm { const char *JT32Begin; // Defaults to "$a." bool SupportsDataRegions; + /// GPRel64Directive - if non-null, a directive that is used to emit a word + /// which should be relocated as a 64-bit GP-relative offset, e.g. .gpdword + /// on Mips. + const char *GPRel64Directive; // Defaults to NULL. + /// GPRel32Directive - if non-null, a directive that is used to emit a word /// which should be relocated as a 32-bit GP-relative offset, e.g. .gpword /// on Mips or .gprel32 on Alpha. @@ -323,13 +323,17 @@ namespace llvm { const char* DwarfSectionOffsetDirective; // Defaults to NULL /// DwarfRequiresRelocationForSectionOffset - True if we need to produce a - // relocation when we want a section offset in dwarf. + /// relocation when we want a section offset in dwarf. bool DwarfRequiresRelocationForSectionOffset; // Defaults to true; - // DwarfUsesLabelOffsetDifference - True if Dwarf2 output can - // use EmitLabelOffsetDifference. + /// DwarfUsesLabelOffsetDifference - True if Dwarf2 output can + /// use EmitLabelOffsetDifference. bool DwarfUsesLabelOffsetForRanges; + /// DwarfUsesRelocationsForStringPool - True if this Dwarf output must use + /// relocations to refer to entries in the string pool. + bool DwarfUsesRelocationsForStringPool; + /// DwarfRegNumForCFI - True if dwarf register numbers are printed /// instead of symbolic register names in .cfi_* directives. bool DwarfRegNumForCFI; // Defaults to false; @@ -381,6 +385,7 @@ namespace llvm { const char *getData64bitsDirective(unsigned AS = 0) const { return AS == 0 ? Data64bitsDirective : getDataASDirective(64, AS); } + const char *getGPRel64Directive() const { return GPRel64Directive; } const char *getGPRel32Directive() const { return GPRel32Directive; } /// [Code|Data]Begin label name accessors. @@ -424,9 +429,6 @@ namespace llvm { // bool hasMachoZeroFillDirective() const { return HasMachoZeroFillDirective; } bool hasMachoTBSSDirective() const { return HasMachoTBSSDirective; } - Structors::OutputOrder getStructorOutputOrder() const { - return StructorOutputOrder; - } bool hasStaticCtorDtorReferenceInStaticMode() const { return HasStaticCtorDtorReferenceInStaticMode; } @@ -487,6 +489,9 @@ namespace llvm { bool doesAllowPeriodsInName() const { return AllowPeriodsInName; } + bool doesAllowUTF8() const { + return AllowUTF8; + } const char *getZeroDirective() const { return ZeroDirective; } @@ -554,7 +559,7 @@ namespace llvm { ExceptionsType == ExceptionHandling::ARM || ExceptionsType == ExceptionHandling::Win64); } - bool doesDwarfUsesInlineInfoSection() const { + bool doesDwarfUseInlineInfoSection() const { return DwarfUsesInlineInfoSection; } const char *getDwarfSectionOffsetDirective() const { @@ -563,9 +568,12 @@ namespace llvm { bool doesDwarfRequireRelocationForSectionOffset() const { return DwarfRequiresRelocationForSectionOffset; } - bool doesDwarfUsesLabelOffsetForRanges() const { + bool doesDwarfUseLabelOffsetForRanges() const { return DwarfUsesLabelOffsetForRanges; } + bool doesDwarfUseRelocationsForStringPool() const { + return DwarfUsesRelocationsForStringPool; + } bool useDwarfRegNumForCFI() const { return DwarfRegNumForCFI; } diff --git a/contrib/llvm/include/llvm/MC/MCAsmInfoCOFF.h b/contrib/llvm/include/llvm/MC/MCAsmInfoCOFF.h index a3ee1593c3ac..0ff3e127ed0e 100644 --- a/contrib/llvm/include/llvm/MC/MCAsmInfoCOFF.h +++ b/contrib/llvm/include/llvm/MC/MCAsmInfoCOFF.h @@ -14,9 +14,21 @@ namespace llvm { class MCAsmInfoCOFF : public MCAsmInfo { + virtual void anchor(); protected: explicit MCAsmInfoCOFF(); - + }; + + class MCAsmInfoMicrosoft : public MCAsmInfoCOFF { + virtual void anchor(); + protected: + explicit MCAsmInfoMicrosoft(); + }; + + class MCAsmInfoGNUCOFF : public MCAsmInfoCOFF { + virtual void anchor(); + protected: + explicit MCAsmInfoGNUCOFF(); }; } diff --git a/contrib/llvm/include/llvm/MC/MCAsmInfoDarwin.h b/contrib/llvm/include/llvm/MC/MCAsmInfoDarwin.h index 1f6c49938c9c..af552de6e690 100644 --- a/contrib/llvm/include/llvm/MC/MCAsmInfoDarwin.h +++ b/contrib/llvm/include/llvm/MC/MCAsmInfoDarwin.h @@ -18,7 +18,9 @@ #include "llvm/MC/MCAsmInfo.h" namespace llvm { - struct MCAsmInfoDarwin : public MCAsmInfo { + class MCAsmInfoDarwin : public MCAsmInfo { + virtual void anchor(); + public: explicit MCAsmInfoDarwin(); }; } diff --git a/contrib/llvm/include/llvm/MC/MCAsmLayout.h b/contrib/llvm/include/llvm/MC/MCAsmLayout.h index a4585d1f1953..cf79216d076a 100644 --- a/contrib/llvm/include/llvm/MC/MCAsmLayout.h +++ b/contrib/llvm/include/llvm/MC/MCAsmLayout.h @@ -10,6 +10,7 @@ #ifndef LLVM_MC_MCASMLAYOUT_H #define LLVM_MC_MCASMLAYOUT_H +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" namespace llvm { diff --git a/contrib/llvm/include/llvm/MC/MCAssembler.h b/contrib/llvm/include/llvm/MC/MCAssembler.h index b8f8cc4cec90..d139173c3e13 100644 --- a/contrib/llvm/include/llvm/MC/MCAssembler.h +++ b/contrib/llvm/include/llvm/MC/MCAssembler.h @@ -25,7 +25,6 @@ namespace llvm { class raw_ostream; class MCAsmLayout; class MCAssembler; -class MCBinaryExpr; class MCContext; class MCCodeEmitter; class MCExpr; @@ -106,6 +105,7 @@ class MCFragment : public ilist_node { }; class MCDataFragment : public MCFragment { + virtual void anchor(); SmallString<32> Contents; /// Fixups - The list of fixups in this fragment. @@ -160,6 +160,8 @@ class MCDataFragment : public MCFragment { // object with just the MCInst and a code size, then we should just change // MCDataFragment to have an optional MCInst at its end. class MCInstFragment : public MCFragment { + virtual void anchor(); + /// Inst - The instruction this is a fragment for. MCInst Inst; @@ -215,6 +217,8 @@ class MCInstFragment : public MCFragment { }; class MCAlignFragment : public MCFragment { + virtual void anchor(); + /// Alignment - The alignment to ensure, in bytes. unsigned Alignment; @@ -263,6 +267,8 @@ class MCAlignFragment : public MCFragment { }; class MCFillFragment : public MCFragment { + virtual void anchor(); + /// Value - Value to use for filling bytes. int64_t Value; @@ -300,6 +306,8 @@ class MCFillFragment : public MCFragment { }; class MCOrgFragment : public MCFragment { + virtual void anchor(); + /// Offset - The offset this fragment should start at. const MCExpr *Offset; @@ -327,6 +335,8 @@ class MCOrgFragment : public MCFragment { }; class MCLEBFragment : public MCFragment { + virtual void anchor(); + /// Value - The value this fragment should contain. const MCExpr *Value; @@ -358,6 +368,8 @@ class MCLEBFragment : public MCFragment { }; class MCDwarfLineAddrFragment : public MCFragment { + virtual void anchor(); + /// LineDelta - the value of the difference between the two line numbers /// between two .loc dwarf directives. int64_t LineDelta; @@ -393,6 +405,8 @@ class MCDwarfLineAddrFragment : public MCFragment { }; class MCDwarfCallFrameFragment : public MCFragment { + virtual void anchor(); + /// AddrDelta - The expression for the difference of the two symbols that /// make up the address delta between two .cfi_* dwarf directives. const MCExpr *AddrDelta; @@ -711,43 +725,44 @@ class MCAssembler { /// \return Whether the fixup value was fully resolved. This is true if the /// \arg Value result is fixed, otherwise the value may change due to /// relocation. - bool EvaluateFixup(const MCAsmLayout &Layout, + bool evaluateFixup(const MCAsmLayout &Layout, const MCFixup &Fixup, const MCFragment *DF, MCValue &Target, uint64_t &Value) const; /// Check whether a fixup can be satisfied, or whether it needs to be relaxed /// (increased in size, in order to hold its value correctly). - bool FixupNeedsRelaxation(const MCFixup &Fixup, const MCFragment *DF, + bool fixupNeedsRelaxation(const MCFixup &Fixup, const MCInstFragment *DF, const MCAsmLayout &Layout) const; /// Check whether the given fragment needs relaxation. - bool FragmentNeedsRelaxation(const MCInstFragment *IF, + bool fragmentNeedsRelaxation(const MCInstFragment *IF, const MCAsmLayout &Layout) const; - /// LayoutOnce - Perform one layout iteration and return true if any offsets + /// layoutOnce - Perform one layout iteration and return true if any offsets /// were adjusted. - bool LayoutOnce(MCAsmLayout &Layout); + bool layoutOnce(MCAsmLayout &Layout); - bool LayoutSectionOnce(MCAsmLayout &Layout, MCSectionData &SD); + bool layoutSectionOnce(MCAsmLayout &Layout, MCSectionData &SD); - bool RelaxInstruction(MCAsmLayout &Layout, MCInstFragment &IF); + bool relaxInstruction(MCAsmLayout &Layout, MCInstFragment &IF); - bool RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &IF); + bool relaxLEB(MCAsmLayout &Layout, MCLEBFragment &IF); - bool RelaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF); - bool RelaxDwarfCallFrameFragment(MCAsmLayout &Layout, + bool relaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF); + bool relaxDwarfCallFrameFragment(MCAsmLayout &Layout, MCDwarfCallFrameFragment &DF); - /// FinishLayout - Finalize a layout, including fragment lowering. - void FinishLayout(MCAsmLayout &Layout); + /// finishLayout - Finalize a layout, including fragment lowering. + void finishLayout(MCAsmLayout &Layout); - uint64_t HandleFixup(const MCAsmLayout &Layout, + uint64_t handleFixup(const MCAsmLayout &Layout, MCFragment &F, const MCFixup &Fixup); public: /// Compute the effective fragment size assuming it is laid out at the given /// \arg SectionAddress and \arg FragmentOffset. - uint64_t ComputeFragmentSize(const MCAsmLayout &Layout, const MCFragment &F) const; + uint64_t computeFragmentSize(const MCAsmLayout &Layout, + const MCFragment &F) const; /// Find the symbol which defines the atom containing the given symbol, or /// null if there is no such symbol. @@ -760,7 +775,7 @@ class MCAssembler { bool isSymbolLinkerVisible(const MCSymbol &SD) const; /// Emit the section contents using the given object writer. - void WriteSectionData(const MCSectionData *Section, + void writeSectionData(const MCSectionData *Section, const MCAsmLayout &Layout) const; /// Check whether a given symbol has been flagged with .thumb_func. diff --git a/contrib/llvm/include/llvm/MC/MCCodeEmitter.h b/contrib/llvm/include/llvm/MC/MCCodeEmitter.h index bc63241bece9..934ef69ce3fe 100644 --- a/contrib/llvm/include/llvm/MC/MCCodeEmitter.h +++ b/contrib/llvm/include/llvm/MC/MCCodeEmitter.h @@ -10,12 +10,8 @@ #ifndef LLVM_MC_MCCODEEMITTER_H #define LLVM_MC_MCCODEEMITTER_H -#include "llvm/MC/MCFixup.h" - -#include - namespace llvm { -class MCExpr; +class MCFixup; class MCInst; class raw_ostream; template class SmallVectorImpl; diff --git a/contrib/llvm/include/llvm/MC/MCCodeGenInfo.h b/contrib/llvm/include/llvm/MC/MCCodeGenInfo.h index 1c54c47e2d95..d1765e1240a4 100644 --- a/contrib/llvm/include/llvm/MC/MCCodeGenInfo.h +++ b/contrib/llvm/include/llvm/MC/MCCodeGenInfo.h @@ -20,7 +20,7 @@ namespace llvm { class MCCodeGenInfo { - /// RelocationModel - Relocation model: statcic, pic, etc. + /// RelocationModel - Relocation model: static, pic, etc. /// Reloc::Model RelocationModel; @@ -28,13 +28,20 @@ namespace llvm { /// CodeModel::Model CMModel; + /// OptLevel - Optimization level. + /// + CodeGenOpt::Level OptLevel; + public: void InitMCCodeGenInfo(Reloc::Model RM = Reloc::Default, - CodeModel::Model CM = CodeModel::Default); + CodeModel::Model CM = CodeModel::Default, + CodeGenOpt::Level OL = CodeGenOpt::Default); Reloc::Model getRelocationModel() const { return RelocationModel; } CodeModel::Model getCodeModel() const { return CMModel; } + + CodeGenOpt::Level getOptLevel() const { return OptLevel; } }; } // namespace llvm diff --git a/contrib/llvm/include/llvm/MC/MCContext.h b/contrib/llvm/include/llvm/MC/MCContext.h index a49a35c8d5ba..b58631919330 100644 --- a/contrib/llvm/include/llvm/MC/MCContext.h +++ b/contrib/llvm/include/llvm/MC/MCContext.h @@ -15,6 +15,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" #include // FIXME: Shouldn't be needed. @@ -29,6 +30,7 @@ namespace llvm { class MCObjectFileInfo; class MCRegisterInfo; class MCLineSection; + class SMLoc; class StringRef; class Twine; class MCSectionMachO; @@ -43,6 +45,8 @@ namespace llvm { public: typedef StringMap SymbolTable; private: + /// The SourceMgr for this object, if any. + const SourceMgr *SrcMgr; /// The MCAsmInfo for this target. const MCAsmInfo &MAI; @@ -98,6 +102,27 @@ namespace llvm { MCDwarfLoc CurrentDwarfLoc; bool DwarfLocSeen; + /// Generate dwarf debugging info for assembly source files. + bool GenDwarfForAssembly; + + /// The current dwarf file number when generate dwarf debugging info for + /// assembly source files. + unsigned GenDwarfFileNumber; + + /// The default initial text section that we generate dwarf debugging line + /// info for when generating dwarf assembly source files. + const MCSection *GenDwarfSection; + /// Symbols created for the start and end of this section. + MCSymbol *GenDwarfSectionStartSym, *GenDwarfSectionEndSym; + + /// The information gathered from labels that will have dwarf label + /// entries when generating dwarf assembly source files. + std::vector MCGenDwarfLabelEntries; + + /// The string to embed in the debug information for the compile unit, if + /// non-empty. + StringRef DwarfDebugFlags; + /// Honor temporary labels, this is useful for debugging semantic /// differences between temporary and non-temporary labels (primarily on /// Darwin). @@ -116,9 +141,11 @@ namespace llvm { public: explicit MCContext(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, - const MCObjectFileInfo *MOFI); + const MCObjectFileInfo *MOFI, const SourceMgr *Mgr = 0); ~MCContext(); + const SourceMgr *getSourceManager() const { return SrcMgr; } + const MCAsmInfo &getAsmInfo() const { return MAI; } const MCRegisterInfo &getRegisterInfo() const { return MRI; } @@ -204,7 +231,8 @@ namespace llvm { /// @{ /// GetDwarfFile - creates an entry in the dwarf file and directory tables. - unsigned GetDwarfFile(StringRef FileName, unsigned FileNumber); + unsigned GetDwarfFile(StringRef Directory, StringRef FileName, + unsigned FileNumber); bool isValidDwarfFileNumber(unsigned FileNumber); @@ -251,6 +279,31 @@ namespace llvm { bool getDwarfLocSeen() { return DwarfLocSeen; } const MCDwarfLoc &getCurrentDwarfLoc() { return CurrentDwarfLoc; } + bool getGenDwarfForAssembly() { return GenDwarfForAssembly; } + void setGenDwarfForAssembly(bool Value) { GenDwarfForAssembly = Value; } + unsigned getGenDwarfFileNumber() { return GenDwarfFileNumber; } + unsigned nextGenDwarfFileNumber() { return ++GenDwarfFileNumber; } + const MCSection *getGenDwarfSection() { return GenDwarfSection; } + void setGenDwarfSection(const MCSection *Sec) { GenDwarfSection = Sec; } + MCSymbol *getGenDwarfSectionStartSym() { return GenDwarfSectionStartSym; } + void setGenDwarfSectionStartSym(MCSymbol *Sym) { + GenDwarfSectionStartSym = Sym; + } + MCSymbol *getGenDwarfSectionEndSym() { return GenDwarfSectionEndSym; } + void setGenDwarfSectionEndSym(MCSymbol *Sym) { + GenDwarfSectionEndSym = Sym; + } + const std::vector + &getMCGenDwarfLabelEntries() const { + return MCGenDwarfLabelEntries; + } + void addMCGenDwarfLabelEntry(const MCGenDwarfLabelEntry *E) { + MCGenDwarfLabelEntries.push_back(E); + } + + void setDwarfDebugFlags(StringRef S) { DwarfDebugFlags = S; } + StringRef getDwarfDebugFlags() { return DwarfDebugFlags; } + /// @} char *getSecureLogFile() { return SecureLogFile; } @@ -268,6 +321,11 @@ namespace llvm { } void Deallocate(void *Ptr) { } + + // Unrecoverable error has occured. Display the best diagnostic we can + // and bail via exit(1). For now, most MC backend errors are unrecoverable. + // FIXME: We should really do something about that. + LLVM_ATTRIBUTE_NORETURN void FatalError(SMLoc L, const Twine &Msg); }; } // end namespace llvm diff --git a/contrib/llvm/include/llvm/MC/MCDisassembler.h b/contrib/llvm/include/llvm/MC/MCDisassembler.h index 454277d6852c..4b5fbec47dce 100644 --- a/contrib/llvm/include/llvm/MC/MCDisassembler.h +++ b/contrib/llvm/include/llvm/MC/MCDisassembler.h @@ -90,7 +90,7 @@ class MCDisassembler { /// @return - An array of instruction information, with one entry for /// each MCInst opcode this disassembler returns. /// NULL if there is no info for this target. - virtual EDInstInfo *getEDInfo() const { return (EDInstInfo*)0; } + virtual const EDInstInfo *getEDInfo() const { return (EDInstInfo*)0; } private: // diff --git a/contrib/llvm/include/llvm/MC/MCDwarf.h b/contrib/llvm/include/llvm/MC/MCDwarf.h index 431e3c4da86a..fdb7ab23c09f 100644 --- a/contrib/llvm/include/llvm/MC/MCDwarf.h +++ b/contrib/llvm/include/llvm/MC/MCDwarf.h @@ -17,20 +17,18 @@ #include "llvm/ADT/StringRef.h" #include "llvm/MC/MachineLocation.h" -#include "llvm/MC/MCObjectWriter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Dwarf.h" #include namespace llvm { class MCContext; - class MCExpr; + class MCObjectWriter; class MCSection; - class MCSectionData; class MCStreamer; class MCSymbol; - class MCObjectStreamer; - class raw_ostream; + class SourceMgr; + class SMLoc; /// MCDwarfFile - Instances of this class represent the name of the dwarf /// .file directive and its associated dwarf file number in the MC file, @@ -210,7 +208,7 @@ namespace llvm { // // This emits the Dwarf file and the line tables. // - static void Emit(MCStreamer *MCOS); + static const MCSymbol *Emit(MCStreamer *MCOS); }; class MCDwarfLineAddr { @@ -227,23 +225,63 @@ namespace llvm { int64_t LineDelta, uint64_t AddrDelta); }; + class MCGenDwarfInfo { + public: + // + // When generating dwarf for assembly source files this emits the Dwarf + // sections. + // + static void Emit(MCStreamer *MCOS, const MCSymbol *LineSectionSymbol); + }; + + // When generating dwarf for assembly source files this is the info that is + // needed to be gathered for each symbol that will have a dwarf label. + class MCGenDwarfLabelEntry { + private: + // Name of the symbol without a leading underbar, if any. + StringRef Name; + // The dwarf file number this symbol is in. + unsigned FileNumber; + // The line number this symbol is at. + unsigned LineNumber; + // The low_pc for the dwarf label is taken from this symbol. + MCSymbol *Label; + + public: + MCGenDwarfLabelEntry(StringRef name, unsigned fileNumber, + unsigned lineNumber, MCSymbol *label) : + Name(name), FileNumber(fileNumber), LineNumber(lineNumber), Label(label){} + + StringRef getName() const { return Name; } + unsigned getFileNumber() const { return FileNumber; } + unsigned getLineNumber() const { return LineNumber; } + MCSymbol *getLabel() const { return Label; } + + // This is called when label is created when we are generating dwarf for + // assembly source files. + static void Make(MCSymbol *Symbol, MCStreamer *MCOS, SourceMgr &SrcMgr, + SMLoc &Loc); + }; + class MCCFIInstruction { public: - enum OpType { SameValue, Remember, Restore, Move, RelMove }; + enum OpType { SameValue, RememberState, RestoreState, Move, RelMove, Escape, + Restore}; private: OpType Operation; MCSymbol *Label; // Move to & from location. MachineLocation Destination; MachineLocation Source; + std::vector Values; public: MCCFIInstruction(OpType Op, MCSymbol *L) : Operation(Op), Label(L) { - assert(Op == Remember || Op == Restore); + assert(Op == RememberState || Op == RestoreState); } MCCFIInstruction(OpType Op, MCSymbol *L, unsigned Register) : Operation(Op), Label(L), Destination(Register) { - assert(Op == SameValue); + assert(Op == SameValue || Op == Restore); } MCCFIInstruction(MCSymbol *L, const MachineLocation &D, const MachineLocation &S) @@ -254,16 +292,24 @@ namespace llvm { : Operation(Op), Label(L), Destination(D), Source(S) { assert(Op == RelMove); } + MCCFIInstruction(OpType Op, MCSymbol *L, StringRef Vals) + : Operation(Op), Label(L), Values(Vals.begin(), Vals.end()) { + assert(Op == Escape); + } OpType getOperation() const { return Operation; } MCSymbol *getLabel() const { return Label; } const MachineLocation &getDestination() const { return Destination; } const MachineLocation &getSource() const { return Source; } + const StringRef getValues() const { + return StringRef(&Values[0], Values.size()); + } }; struct MCDwarfFrameInfo { MCDwarfFrameInfo() : Begin(0), End(0), Personality(0), Lsda(0), Function(0), Instructions(), PersonalityEncoding(), - LsdaEncoding(0), CompactUnwindEncoding(0) {} + LsdaEncoding(0), CompactUnwindEncoding(0), + IsSignalFrame(false) {} MCSymbol *Begin; MCSymbol *End; const MCSymbol *Personality; @@ -273,6 +319,7 @@ namespace llvm { unsigned PersonalityEncoding; unsigned LsdaEncoding; uint32_t CompactUnwindEncoding; + bool IsSignalFrame; }; class MCDwarfFrameEmitter { diff --git a/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h b/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h index 3c150dca9e62..f153cb0c1af0 100644 --- a/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h +++ b/contrib/llvm/include/llvm/MC/MCELFObjectWriter.h @@ -10,28 +10,91 @@ #ifndef LLVM_MC_MCELFOBJECTWRITER_H #define LLVM_MC_MCELFOBJECTWRITER_H -#include "llvm/MC/MCObjectWriter.h" +#include "llvm/ADT/Triple.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/ELF.h" +#include namespace llvm { +class MCAssembler; +class MCFixup; +class MCFragment; +class MCObjectWriter; +class MCSymbol; +class MCValue; + +/// @name Relocation Data +/// @{ + +struct ELFRelocationEntry { + // Make these big enough for both 32-bit and 64-bit + uint64_t r_offset; + int Index; + unsigned Type; + const MCSymbol *Symbol; + uint64_t r_addend; + const MCFixup *Fixup; + + ELFRelocationEntry() + : r_offset(0), Index(0), Type(0), Symbol(0), r_addend(0), Fixup(0) {} + + ELFRelocationEntry(uint64_t RelocOffset, int Idx, unsigned RelType, + const MCSymbol *Sym, uint64_t Addend, const MCFixup &Fixup) + : r_offset(RelocOffset), Index(Idx), Type(RelType), Symbol(Sym), + r_addend(Addend), Fixup(&Fixup) {} + + // Support lexicographic sorting. + bool operator<(const ELFRelocationEntry &RE) const { + return RE.r_offset < r_offset; + } +}; + class MCELFObjectTargetWriter { - const Triple::OSType OSType; + const uint8_t OSABI; const uint16_t EMachine; const unsigned HasRelocationAddend : 1; const unsigned Is64Bit : 1; + protected: - MCELFObjectTargetWriter(bool Is64Bit_, Triple::OSType OSType_, + + MCELFObjectTargetWriter(bool Is64Bit_, uint8_t OSABI_, uint16_t EMachine_, bool HasRelocationAddend_); public: - virtual ~MCELFObjectTargetWriter(); + static uint8_t getOSABI(Triple::OSType OSType) { + switch (OSType) { + case Triple::FreeBSD: + return ELF::ELFOSABI_FREEBSD; + case Triple::Linux: + return ELF::ELFOSABI_LINUX; + default: + return ELF::ELFOSABI_NONE; + } + } + + virtual ~MCELFObjectTargetWriter() {} + + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend) const = 0; + virtual unsigned getEFlags() const; + virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + const MCFixup &Fixup, + bool IsPCRel) const; + virtual void adjustFixupOffset(const MCFixup &Fixup, + uint64_t &RelocOffset); + + virtual void sortRelocs(const MCAssembler &Asm, + std::vector &Relocs); /// @name Accessors /// @{ - Triple::OSType getOSType() { return OSType; } + uint8_t getOSABI() { return OSABI; } uint16_t getEMachine() { return EMachine; } bool hasRelocationAddend() { return HasRelocationAddend; } - bool is64Bit() { return Is64Bit; } + bool is64Bit() const { return Is64Bit; } /// @} }; diff --git a/contrib/llvm/include/llvm/MC/MCExpr.h b/contrib/llvm/include/llvm/MC/MCExpr.h index 0f285999ad62..ff33641dba7f 100644 --- a/contrib/llvm/include/llvm/MC/MCExpr.h +++ b/contrib/llvm/include/llvm/MC/MCExpr.h @@ -15,7 +15,6 @@ #include "llvm/Support/DataTypes.h" namespace llvm { -class MCAsmInfo; class MCAsmLayout; class MCAssembler; class MCContext; @@ -162,6 +161,7 @@ class MCSymbolRefExpr : public MCExpr { VK_TPOFF, VK_DTPOFF, VK_TLVP, // Mach-O thread local variable relocation + VK_SECREL, // FIXME: We'd really like to use the generic Kinds listed above for these. VK_ARM_PLT, // ARM-style PLT references. i.e., (PLT) instead of @PLT VK_ARM_TLSGD, // ditto for TLSGD, GOT, GOTOFF, TPOFF and GOTTPOFF @@ -169,12 +169,32 @@ class MCSymbolRefExpr : public MCExpr { VK_ARM_GOTOFF, VK_ARM_TPOFF, VK_ARM_GOTTPOFF, + VK_ARM_TARGET1, VK_PPC_TOC, VK_PPC_DARWIN_HA16, // ha16(symbol) VK_PPC_DARWIN_LO16, // lo16(symbol) VK_PPC_GAS_HA16, // symbol@ha - VK_PPC_GAS_LO16 // symbol@l + VK_PPC_GAS_LO16, // symbol@l + + VK_Mips_GPREL, + VK_Mips_GOT_CALL, + VK_Mips_GOT16, + VK_Mips_GOT, + VK_Mips_ABS_HI, + VK_Mips_ABS_LO, + VK_Mips_TLSGD, + VK_Mips_TLSLDM, + VK_Mips_DTPREL_HI, + VK_Mips_DTPREL_LO, + VK_Mips_GOTTPREL, + VK_Mips_TPREL_HI, + VK_Mips_TPREL_LO, + VK_Mips_GPOFF_HI, + VK_Mips_GPOFF_LO, + VK_Mips_GOT_DISP, + VK_Mips_GOT_PAGE, + VK_Mips_GOT_OFST }; private: @@ -185,7 +205,9 @@ class MCSymbolRefExpr : public MCExpr { const VariantKind Kind; explicit MCSymbolRefExpr(const MCSymbol *_Symbol, VariantKind _Kind) - : MCExpr(MCExpr::SymbolRef), Symbol(_Symbol), Kind(_Kind) {} + : MCExpr(MCExpr::SymbolRef), Symbol(_Symbol), Kind(_Kind) { + assert(Symbol); + } public: /// @name Construction diff --git a/contrib/llvm/include/llvm/MC/MCFixup.h b/contrib/llvm/include/llvm/MC/MCFixup.h index 6fde797e40fd..16e9eb730b4e 100644 --- a/contrib/llvm/include/llvm/MC/MCFixup.h +++ b/contrib/llvm/include/llvm/MC/MCFixup.h @@ -11,6 +11,8 @@ #define LLVM_MC_MCFIXUP_H #include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SMLoc.h" #include namespace llvm { @@ -26,6 +28,14 @@ enum MCFixupKind { FK_PCRel_2, ///< A two-byte pc relative fixup. FK_PCRel_4, ///< A four-byte pc relative fixup. FK_PCRel_8, ///< A eight-byte pc relative fixup. + FK_GPRel_1, ///< A one-byte gp relative fixup. + FK_GPRel_2, ///< A two-byte gp relative fixup. + FK_GPRel_4, ///< A four-byte gp relative fixup. + FK_GPRel_8, ///< A eight-byte gp relative fixup. + FK_SecRel_1, ///< A one-byte section relative fixup. + FK_SecRel_2, ///< A two-byte section relative fixup. + FK_SecRel_4, ///< A four-byte section relative fixup. + FK_SecRel_8, ///< A eight-byte section relative fixup. FirstTargetFixupKind = 128, @@ -61,14 +71,17 @@ class MCFixup { /// determine how the operand value should be encoded into the instruction. unsigned Kind; + /// The source location which gave rise to the fixup, if any. + SMLoc Loc; public: static MCFixup Create(uint32_t Offset, const MCExpr *Value, - MCFixupKind Kind) { + MCFixupKind Kind, SMLoc Loc = SMLoc()) { assert(unsigned(Kind) < MaxTargetFixupKind && "Kind out of range!"); MCFixup FI; FI.Value = Value; FI.Offset = Offset; FI.Kind = unsigned(Kind); + FI.Loc = Loc; return FI; } @@ -83,13 +96,15 @@ class MCFixup { /// size. It is an error to pass an unsupported size. static MCFixupKind getKindForSize(unsigned Size, bool isPCRel) { switch (Size) { - default: assert(0 && "Invalid generic fixup size!"); + default: llvm_unreachable("Invalid generic fixup size!"); case 1: return isPCRel ? FK_PCRel_1 : FK_Data_1; case 2: return isPCRel ? FK_PCRel_2 : FK_Data_2; case 4: return isPCRel ? FK_PCRel_4 : FK_Data_4; case 8: return isPCRel ? FK_PCRel_8 : FK_Data_8; } } + + SMLoc getLoc() const { return Loc; } }; } // End llvm namespace diff --git a/contrib/llvm/include/llvm/MC/MCInst.h b/contrib/llvm/include/llvm/MC/MCInst.h index d38476477495..397a37d3ce48 100644 --- a/contrib/llvm/include/llvm/MC/MCInst.h +++ b/contrib/llvm/include/llvm/MC/MCInst.h @@ -19,12 +19,14 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/SMLoc.h" namespace llvm { class raw_ostream; class MCAsmInfo; class MCInstPrinter; class MCExpr; +class MCInst; /// MCOperand - Instances of this class represent operands of the MCInst class. /// This is a simple discriminated union. @@ -34,7 +36,8 @@ class MCOperand { kRegister, ///< Register operand. kImmediate, ///< Immediate operand. kFPImmediate, ///< Floating-point immediate operand. - kExpr ///< Relocatable immediate operand. + kExpr, ///< Relocatable immediate operand. + kInst ///< Sub-instruction operand. }; unsigned char Kind; @@ -43,6 +46,7 @@ class MCOperand { int64_t ImmVal; double FPImmVal; const MCExpr *ExprVal; + const MCInst *InstVal; }; public: @@ -53,6 +57,7 @@ class MCOperand { bool isImm() const { return Kind == kImmediate; } bool isFPImm() const { return Kind == kFPImmediate; } bool isExpr() const { return Kind == kExpr; } + bool isInst() const { return Kind == kInst; } /// getReg - Returns the register number. unsigned getReg() const { @@ -94,6 +99,15 @@ class MCOperand { ExprVal = Val; } + const MCInst *getInst() const { + assert(isInst() && "This is not a sub-instruction"); + return InstVal; + } + void setInst(const MCInst *Val) { + assert(isInst() && "This is not a sub-instruction"); + InstVal = Val; + } + static MCOperand CreateReg(unsigned Reg) { MCOperand Op; Op.Kind = kRegister; @@ -118,24 +132,34 @@ class MCOperand { Op.ExprVal = Val; return Op; } + static MCOperand CreateInst(const MCInst *Val) { + MCOperand Op; + Op.Kind = kInst; + Op.InstVal = Val; + return Op; + } void print(raw_ostream &OS, const MCAsmInfo *MAI) const; void dump() const; }; +template <> struct isPodLike { static const bool value = true; }; /// MCInst - Instances of this class represent a single low-level machine /// instruction. class MCInst { unsigned Opcode; + SMLoc Loc; SmallVector Operands; public: MCInst() : Opcode(0) {} void setOpcode(unsigned Op) { Opcode = Op; } - unsigned getOpcode() const { return Opcode; } + void setLoc(SMLoc loc) { Loc = loc; } + SMLoc getLoc() const { return Loc; } + const MCOperand &getOperand(unsigned i) const { return Operands[i]; } MCOperand &getOperand(unsigned i) { return Operands[i]; } unsigned getNumOperands() const { return Operands.size(); } diff --git a/contrib/llvm/include/llvm/MC/MCInstPrinter.h b/contrib/llvm/include/llvm/MC/MCInstPrinter.h index 01ad2d3f8088..3c4f28be7ca6 100644 --- a/contrib/llvm/include/llvm/MC/MCInstPrinter.h +++ b/contrib/llvm/include/llvm/MC/MCInstPrinter.h @@ -14,6 +14,8 @@ namespace llvm { class MCInst; class raw_ostream; class MCAsmInfo; +class MCInstrInfo; +class MCRegisterInfo; class StringRef; /// MCInstPrinter - This is an instance of a target assembly language printer @@ -25,6 +27,8 @@ class MCInstPrinter { /// assembly emission is disable. raw_ostream *CommentStream; const MCAsmInfo &MAI; + const MCInstrInfo &MII; + const MCRegisterInfo &MRI; /// The current set of available features. unsigned AvailableFeatures; @@ -32,8 +36,9 @@ class MCInstPrinter { /// Utility function for printing annotations. void printAnnotation(raw_ostream &OS, StringRef Annot); public: - MCInstPrinter(const MCAsmInfo &mai) - : CommentStream(0), MAI(mai), AvailableFeatures(0) {} + MCInstPrinter(const MCAsmInfo &mai, const MCInstrInfo &mii, + const MCRegisterInfo &mri) + : CommentStream(0), MAI(mai), MII(mii), MRI(mri), AvailableFeatures(0) {} virtual ~MCInstPrinter(); @@ -47,7 +52,7 @@ class MCInstPrinter { /// getOpcodeName - Return the name of the specified opcode enum (e.g. /// "MOV32ri") or empty if we can't resolve it. - virtual StringRef getOpcodeName(unsigned Opcode) const; + StringRef getOpcodeName(unsigned Opcode) const; /// printRegName - Print the assembler register name. virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; diff --git a/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h b/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h index 8f3c499b1c73..acad6336aca7 100644 --- a/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h +++ b/contrib/llvm/include/llvm/MC/MCInstrAnalysis.h @@ -33,7 +33,7 @@ class MCInstrAnalysis { } virtual bool isConditionalBranch(const MCInst &Inst) const { - return Info->get(Inst.getOpcode()).isBranch(); + return Info->get(Inst.getOpcode()).isConditionalBranch(); } virtual bool isUnconditionalBranch(const MCInst &Inst) const { diff --git a/contrib/llvm/include/llvm/MC/MCInstrDesc.h b/contrib/llvm/include/llvm/MC/MCInstrDesc.h index aafa800c1ac8..186612d904d8 100644 --- a/contrib/llvm/include/llvm/MC/MCInstrDesc.h +++ b/contrib/llvm/include/llvm/MC/MCInstrDesc.h @@ -58,17 +58,17 @@ class MCOperandInfo { /// if the operand is a register. If isLookupPtrRegClass is set, then this is /// an index that is passed to TargetRegisterInfo::getPointerRegClass(x) to /// get a dynamic register class. - short RegClass; + int16_t RegClass; /// Flags - These are flags from the MCOI::OperandFlags enum. - unsigned short Flags; + uint8_t Flags; + + /// OperandType - Information about the type of the operand. + uint8_t OperandType; /// Lower 16 bits are used to specify which constraints are set. The higher 16 /// bits are used to specify the value of constraints (4 bits each). - unsigned Constraints; - - /// OperandType - Information about the type of the operand. - MCOI::OperandType OperandType; + uint32_t Constraints; /// Currently no other information. /// isLookupPtrRegClass - Set if this operand is a pointer value and it @@ -137,11 +137,10 @@ class MCInstrDesc { unsigned short NumDefs; // Num of args that are definitions unsigned short SchedClass; // enum identifying instr sched class unsigned short Size; // Number of bytes in encoding. - const char * Name; // Name of the instruction record in td file unsigned Flags; // Flags identifying machine instr class uint64_t TSFlags; // Target Specific Flag values - const unsigned *ImplicitUses; // Registers implicitly read by this instr - const unsigned *ImplicitDefs; // Registers implicitly defined by this instr + const uint16_t *ImplicitUses; // Registers implicitly read by this instr + const uint16_t *ImplicitDefs; // Registers implicitly defined by this instr const MCOperandInfo *OpInfo; // 'NumOperands' entries about operands /// getOperandConstraint - Returns the value of the specific constraint if @@ -161,12 +160,6 @@ class MCInstrDesc { return Opcode; } - /// getName - Return the name of the record in the .td file for this - /// instruction, for example "ADD8ri". - const char *getName() const { - return Name; - } - /// getNumOperands - Return the number of declared MachineOperands for this /// MachineInstruction. Note that variadic (isVariadic() returns true) /// instructions may have additional operands at the end of the list, and note @@ -184,6 +177,10 @@ class MCInstrDesc { return NumDefs; } + /// getFlags - Return flags of this instruction. + /// + unsigned getFlags() const { return Flags; } + /// isVariadic - Return true if this instruction can have a variable number of /// operands. In this case, the variable operands will be after the normal /// operands but before the implicit definitions and uses (if any are @@ -198,84 +195,6 @@ class MCInstrDesc { return Flags & (1 << MCID::HasOptionalDef); } - /// getImplicitUses - Return a list of registers that are potentially - /// read by any instance of this machine instruction. For example, on X86, - /// the "adc" instruction adds two register operands and adds the carry bit in - /// from the flags register. In this case, the instruction is marked as - /// implicitly reading the flags. Likewise, the variable shift instruction on - /// X86 is marked as implicitly reading the 'CL' register, which it always - /// does. - /// - /// This method returns null if the instruction has no implicit uses. - const unsigned *getImplicitUses() const { - return ImplicitUses; - } - - /// getNumImplicitUses - Return the number of implicit uses this instruction - /// has. - unsigned getNumImplicitUses() const { - if (ImplicitUses == 0) return 0; - unsigned i = 0; - for (; ImplicitUses[i]; ++i) /*empty*/; - return i; - } - - /// getImplicitDefs - Return a list of registers that are potentially - /// written by any instance of this machine instruction. For example, on X86, - /// many instructions implicitly set the flags register. In this case, they - /// are marked as setting the FLAGS. Likewise, many instructions always - /// deposit their result in a physical register. For example, the X86 divide - /// instruction always deposits the quotient and remainder in the EAX/EDX - /// registers. For that instruction, this will return a list containing the - /// EAX/EDX/EFLAGS registers. - /// - /// This method returns null if the instruction has no implicit defs. - const unsigned *getImplicitDefs() const { - return ImplicitDefs; - } - - /// getNumImplicitDefs - Return the number of implicit defs this instruction - /// has. - unsigned getNumImplicitDefs() const { - if (ImplicitDefs == 0) return 0; - unsigned i = 0; - for (; ImplicitDefs[i]; ++i) /*empty*/; - return i; - } - - /// hasImplicitUseOfPhysReg - Return true if this instruction implicitly - /// uses the specified physical register. - bool hasImplicitUseOfPhysReg(unsigned Reg) const { - if (const unsigned *ImpUses = ImplicitUses) - for (; *ImpUses; ++ImpUses) - if (*ImpUses == Reg) return true; - return false; - } - - /// hasImplicitDefOfPhysReg - Return true if this instruction implicitly - /// defines the specified physical register. - bool hasImplicitDefOfPhysReg(unsigned Reg) const { - if (const unsigned *ImpDefs = ImplicitDefs) - for (; *ImpDefs; ++ImpDefs) - if (*ImpDefs == Reg) return true; - return false; - } - - /// getSchedClass - Return the scheduling class for this instruction. The - /// scheduling class is an index into the InstrItineraryData table. This - /// returns zero if there is no known scheduling information for the - /// instruction. - /// - unsigned getSchedClass() const { - return SchedClass; - } - - /// getSize - Return the number of bytes in the encoding of this instruction, - /// or zero if the encoding size cannot be known from the opcode. - unsigned getSize() const { - return Size; - } - /// isPseudo - Return true if this is a pseudo instruction that doesn't /// correspond to a real machine instruction. /// @@ -298,18 +217,6 @@ class MCInstrDesc { return Flags & (1 << MCID::Barrier); } - /// findFirstPredOperandIdx() - Find the index of the first operand in the - /// operand list that is used to represent the predicate. It returns -1 if - /// none is found. - int findFirstPredOperandIdx() const { - if (isPredicable()) { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (OpInfo[i].isPredicate()) - return i; - } - return -1; - } - /// isTerminator - Returns true if this instruction part of the terminator for /// a basic block. Typically this is things like return and branch /// instructions. @@ -530,6 +437,97 @@ class MCInstrDesc { bool hasExtraDefRegAllocReq() const { return Flags & (1 << MCID::ExtraDefRegAllocReq); } + + + /// getImplicitUses - Return a list of registers that are potentially + /// read by any instance of this machine instruction. For example, on X86, + /// the "adc" instruction adds two register operands and adds the carry bit in + /// from the flags register. In this case, the instruction is marked as + /// implicitly reading the flags. Likewise, the variable shift instruction on + /// X86 is marked as implicitly reading the 'CL' register, which it always + /// does. + /// + /// This method returns null if the instruction has no implicit uses. + const uint16_t *getImplicitUses() const { + return ImplicitUses; + } + + /// getNumImplicitUses - Return the number of implicit uses this instruction + /// has. + unsigned getNumImplicitUses() const { + if (ImplicitUses == 0) return 0; + unsigned i = 0; + for (; ImplicitUses[i]; ++i) /*empty*/; + return i; + } + + /// getImplicitDefs - Return a list of registers that are potentially + /// written by any instance of this machine instruction. For example, on X86, + /// many instructions implicitly set the flags register. In this case, they + /// are marked as setting the FLAGS. Likewise, many instructions always + /// deposit their result in a physical register. For example, the X86 divide + /// instruction always deposits the quotient and remainder in the EAX/EDX + /// registers. For that instruction, this will return a list containing the + /// EAX/EDX/EFLAGS registers. + /// + /// This method returns null if the instruction has no implicit defs. + const uint16_t *getImplicitDefs() const { + return ImplicitDefs; + } + + /// getNumImplicitDefs - Return the number of implicit defs this instruction + /// has. + unsigned getNumImplicitDefs() const { + if (ImplicitDefs == 0) return 0; + unsigned i = 0; + for (; ImplicitDefs[i]; ++i) /*empty*/; + return i; + } + + /// hasImplicitUseOfPhysReg - Return true if this instruction implicitly + /// uses the specified physical register. + bool hasImplicitUseOfPhysReg(unsigned Reg) const { + if (const uint16_t *ImpUses = ImplicitUses) + for (; *ImpUses; ++ImpUses) + if (*ImpUses == Reg) return true; + return false; + } + + /// hasImplicitDefOfPhysReg - Return true if this instruction implicitly + /// defines the specified physical register. + bool hasImplicitDefOfPhysReg(unsigned Reg) const { + if (const uint16_t *ImpDefs = ImplicitDefs) + for (; *ImpDefs; ++ImpDefs) + if (*ImpDefs == Reg) return true; + return false; + } + + /// getSchedClass - Return the scheduling class for this instruction. The + /// scheduling class is an index into the InstrItineraryData table. This + /// returns zero if there is no known scheduling information for the + /// instruction. + /// + unsigned getSchedClass() const { + return SchedClass; + } + + /// getSize - Return the number of bytes in the encoding of this instruction, + /// or zero if the encoding size cannot be known from the opcode. + unsigned getSize() const { + return Size; + } + + /// findFirstPredOperandIdx() - Find the index of the first operand in the + /// operand list that is used to represent the predicate. It returns -1 if + /// none is found. + int findFirstPredOperandIdx() const { + if (isPredicable()) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (OpInfo[i].isPredicate()) + return i; + } + return -1; + } }; } // end namespace llvm diff --git a/contrib/llvm/include/llvm/MC/MCInstrInfo.h b/contrib/llvm/include/llvm/MC/MCInstrInfo.h index a63e5faf8f93..1d3a36ca7c73 100644 --- a/contrib/llvm/include/llvm/MC/MCInstrInfo.h +++ b/contrib/llvm/include/llvm/MC/MCInstrInfo.h @@ -24,14 +24,19 @@ namespace llvm { /// MCInstrInfo - Interface to description of machine instruction set /// class MCInstrInfo { - const MCInstrDesc *Desc; // Raw array to allow static init'n - unsigned NumOpcodes; // Number of entries in the desc array + const MCInstrDesc *Desc; // Raw array to allow static init'n + const unsigned *InstrNameIndices; // Array for name indices in InstrNameData + const char *InstrNameData; // Instruction name string pool + unsigned NumOpcodes; // Number of entries in the desc array public: /// InitMCInstrInfo - Initialize MCInstrInfo, called by TableGen /// auto-generated routines. *DO NOT USE*. - void InitMCInstrInfo(const MCInstrDesc *D, unsigned NO) { + void InitMCInstrInfo(const MCInstrDesc *D, const unsigned *NI, const char *ND, + unsigned NO) { Desc = D; + InstrNameIndices = NI; + InstrNameData = ND; NumOpcodes = NO; } @@ -44,6 +49,12 @@ class MCInstrInfo { assert(Opcode < NumOpcodes && "Invalid opcode!"); return Desc[Opcode]; } + + /// getName - Returns the name for the instructions with the given opcode. + const char *getName(unsigned Opcode) const { + assert(Opcode < NumOpcodes && "Invalid opcode!"); + return &InstrNameData[InstrNameIndices[Opcode]]; + } }; } // End llvm namespace diff --git a/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h b/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h index 060d5085d0c3..aea4b410fea2 100644 --- a/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h +++ b/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h @@ -14,15 +14,14 @@ #ifndef LLVM_MC_MCBJECTFILEINFO_H #define LLVM_MC_MCBJECTFILEINFO_H -#include "llvm/MC/MCCodeGenInfo.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/MC/SectionKind.h" +#include "llvm/Support/CodeGen.h" namespace llvm { -class MCContext; -class MCSection; -class Triple; - + class MCContext; + class MCSection; + class StringRef; + class Triple; + class MCObjectFileInfo { protected: /// CommDirectiveSupportsAlignment - True if .comm supports alignment. This @@ -47,6 +46,9 @@ class MCObjectFileInfo { unsigned FDEEncoding; unsigned FDECFIEncoding; unsigned TTypeEncoding; + // Section flags for eh_frame + unsigned EHSectionType; + unsigned EHSectionFlags; /// TextSection - Section directive for standard text. /// @@ -82,13 +84,20 @@ class MCObjectFileInfo { /// this is the section to emit them into. const MCSection *CompactUnwindSection; + /// DwarfAccelNamesSection, DwarfAccelObjCSection + /// If we use the DWARF accelerated hash tables then we want toe emit these + /// sections. + const MCSection *DwarfAccelNamesSection; + const MCSection *DwarfAccelObjCSection; + const MCSection *DwarfAccelNamespaceSection; + const MCSection *DwarfAccelTypesSection; + // Dwarf sections for debug info. If a target supports debug info, these must // be set. const MCSection *DwarfAbbrevSection; const MCSection *DwarfInfoSection; const MCSection *DwarfLineSection; const MCSection *DwarfFrameSection; - const MCSection *DwarfPubNamesSection; const MCSection *DwarfPubTypesSection; const MCSection *DwarfDebugInlineSection; const MCSection *DwarfStrSection; @@ -102,7 +111,7 @@ class MCObjectFileInfo { const MCSection *TLSExtraDataSection; /// TLSDataSection - Section directive for Thread Local data. - /// ELF and MachO only. + /// ELF, MachO and COFF. const MCSection *TLSDataSection; // Defaults to ".tdata". /// TLSBSSSection - Section directive for Thread Local uninitialized data. @@ -156,7 +165,7 @@ class MCObjectFileInfo { const MCSection *DrectveSection; const MCSection *PDataSection; const MCSection *XDataSection; - + public: void InitMCObjectFileInfo(StringRef TT, Reloc::Model RM, CodeModel::Model CM, MCContext &ctx); @@ -181,17 +190,26 @@ class MCObjectFileInfo { const MCSection *getTextSection() const { return TextSection; } const MCSection *getDataSection() const { return DataSection; } const MCSection *getBSSSection() const { return BSSSection; } - const MCSection *getStaticCtorSection() const { return StaticCtorSection; } - const MCSection *getStaticDtorSection() const { return StaticDtorSection; } const MCSection *getLSDASection() const { return LSDASection; } const MCSection *getCompactUnwindSection() const{ return CompactUnwindSection; } + const MCSection *getDwarfAccelNamesSection() const { + return DwarfAccelNamesSection; + } + const MCSection *getDwarfAccelObjCSection() const { + return DwarfAccelObjCSection; + } + const MCSection *getDwarfAccelNamespaceSection() const { + return DwarfAccelNamespaceSection; + } + const MCSection *getDwarfAccelTypesSection() const { + return DwarfAccelTypesSection; + } const MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; } const MCSection *getDwarfInfoSection() const { return DwarfInfoSection; } const MCSection *getDwarfLineSection() const { return DwarfLineSection; } const MCSection *getDwarfFrameSection() const { return DwarfFrameSection; } - const MCSection *getDwarfPubNamesSection() const{return DwarfPubNamesSection;} const MCSection *getDwarfPubTypesSection() const{return DwarfPubTypesSection;} const MCSection *getDwarfDebugInlineSection() const { return DwarfDebugInlineSection; diff --git a/contrib/llvm/include/llvm/MC/MCObjectStreamer.h b/contrib/llvm/include/llvm/MC/MCObjectStreamer.h index f897e64f4456..a69075ddd002 100644 --- a/contrib/llvm/include/llvm/MC/MCObjectStreamer.h +++ b/contrib/llvm/include/llvm/MC/MCObjectStreamer.h @@ -34,6 +34,8 @@ class MCObjectStreamer : public MCStreamer { MCSectionData *CurSectionData; virtual void EmitInstToData(const MCInst &Inst) = 0; + virtual void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame); + virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame); protected: MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, @@ -70,14 +72,15 @@ class MCObjectStreamer : public MCStreamer { virtual void ChangeSection(const MCSection *Section); virtual void EmitInstruction(const MCInst &Inst); virtual void EmitInstToFragment(const MCInst &Inst); - virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value); + virtual bool EmitValueToOffset(const MCExpr *Offset, unsigned char Value); virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *LastLabel, const MCSymbol *Label, unsigned PointerSize); virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, const MCSymbol *Label); - virtual void Finish(); + virtual void EmitGPRel32Value(const MCExpr *Value); + virtual void FinishImpl(); /// @} }; diff --git a/contrib/llvm/include/llvm/MC/MCObjectWriter.h b/contrib/llvm/include/llvm/MC/MCObjectWriter.h index 782d844598b4..6e44e6ceffa3 100644 --- a/contrib/llvm/include/llvm/MC/MCObjectWriter.h +++ b/contrib/llvm/include/llvm/MC/MCObjectWriter.h @@ -10,7 +10,6 @@ #ifndef LLVM_MC_MCOBJECTWRITER_H #define LLVM_MC_MCOBJECTWRITER_H -#include "llvm/ADT/Triple.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/DataTypes.h" #include @@ -20,11 +19,9 @@ class MCAsmLayout; class MCAssembler; class MCFixup; class MCFragment; -class MCSymbol; class MCSymbolData; class MCSymbolRefExpr; class MCValue; -class raw_ostream; /// MCObjectWriter - Defines the object file and target independent interfaces /// used by the assembler backend to write native file format object files. @@ -188,11 +185,10 @@ class MCObjectWriter { /// Utility function to encode a SLEB128 value. static void EncodeSLEB128(int64_t Value, raw_ostream &OS); /// Utility function to encode a ULEB128 value. - static void EncodeULEB128(uint64_t Value, raw_ostream &OS); + static void EncodeULEB128(uint64_t Value, raw_ostream &OS, + unsigned Padding = 0); }; -MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS, bool is64Bit); - } // End llvm namespace #endif diff --git a/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h index 9bbb75581c25..ac04483ccf16 100644 --- a/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h +++ b/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h @@ -71,6 +71,7 @@ class AsmToken { bool isNot(TokenKind K) const { return Kind != K; } SMLoc getLoc() const; + SMLoc getEndLoc() const; /// getStringContents - Get the contents of a string token (without quotes). StringRef getStringContents() const { diff --git a/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h index 6ff175349e43..793c7097ba14 100644 --- a/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h +++ b/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h @@ -11,6 +11,7 @@ #define LLVM_MC_MCASMPARSER_H #include "llvm/Support/DataTypes.h" +#include "llvm/ADT/ArrayRef.h" namespace llvm { class AsmToken; @@ -22,6 +23,7 @@ class MCExpr; class MCStreamer; class MCTargetAsmParser; class SMLoc; +class SMRange; class SourceMgr; class StringRef; class Twine; @@ -62,6 +64,9 @@ class MCAsmParser { MCTargetAsmParser &getTargetParser() const { return *TargetParser; } void setTargetParser(MCTargetAsmParser &P); + virtual unsigned getAssemblerDialect() { return 0;} + virtual void setAssemblerDialect(unsigned i) { } + bool getShowParsedOperands() const { return ShowParsedOperands; } void setShowParsedOperands(bool Value) { ShowParsedOperands = Value; } @@ -72,14 +77,16 @@ class MCAsmParser { /// Msg. /// /// \return The return value is true, if warnings are fatal. - virtual bool Warning(SMLoc L, const Twine &Msg) = 0; + virtual bool Warning(SMLoc L, const Twine &Msg, + ArrayRef Ranges = ArrayRef()) = 0; /// Error - Emit an error at the location \arg L, with the message \arg /// Msg. /// /// \return The return value is always true, as an idiomatic convenience to /// clients. - virtual bool Error(SMLoc L, const Twine &Msg) = 0; + virtual bool Error(SMLoc L, const Twine &Msg, + ArrayRef Ranges = ArrayRef()) = 0; /// Lex - Get the next AsmToken in the stream, possibly handling file /// inclusion first. @@ -89,7 +96,8 @@ class MCAsmParser { const AsmToken &getTok(); /// \brief Report an error at the current lexer location. - bool TokError(const Twine &Msg); + bool TokError(const Twine &Msg, + ArrayRef Ranges = ArrayRef()); /// ParseIdentifier - Parse an identifier or string (as a quoted identifier) /// and set \arg Res to the identifier contents. diff --git a/contrib/llvm/include/llvm/MC/MCRegisterInfo.h b/contrib/llvm/include/llvm/MC/MCRegisterInfo.h index ada5ae80af0c..27acf2f2cc21 100644 --- a/contrib/llvm/include/llvm/MC/MCRegisterInfo.h +++ b/contrib/llvm/include/llvm/MC/MCRegisterInfo.h @@ -17,6 +17,7 @@ #define LLVM_MC_MCREGISTERINFO_H #include "llvm/ADT/DenseMap.h" +#include "llvm/Support/ErrorHandling.h" #include namespace llvm { @@ -24,28 +25,18 @@ namespace llvm { /// MCRegisterClass - Base class of TargetRegisterClass. class MCRegisterClass { public: - typedef const unsigned* iterator; - typedef const unsigned* const_iterator; -private: - unsigned ID; + typedef const uint16_t* iterator; + typedef const uint16_t* const_iterator; + const char *Name; - const unsigned RegSize, Alignment; // Size & Alignment of register in bytes - const int CopyCost; + const iterator RegsBegin; + const uint8_t *const RegSet; + const uint16_t RegsSize; + const uint16_t RegSetSize; + const uint16_t ID; + const uint16_t RegSize, Alignment; // Size & Alignment of register in bytes + const int8_t CopyCost; const bool Allocatable; - const iterator RegsBegin, RegsEnd; - const unsigned char *const RegSet; - const unsigned RegSetSize; -public: - MCRegisterClass(unsigned id, const char *name, - unsigned RS, unsigned Al, int CC, bool Allocable, - iterator RB, iterator RE, const unsigned char *Bits, - unsigned NumBytes) - : ID(id), Name(name), RegSize(RS), Alignment(Al), CopyCost(CC), - Allocatable(Allocable), RegsBegin(RB), RegsEnd(RE), RegSet(Bits), - RegSetSize(NumBytes) { - for (iterator i = RegsBegin; i != RegsEnd; ++i) - assert(contains(*i) && "Bit field corrupted."); - } /// getID() - Return the register class ID number. /// @@ -58,11 +49,11 @@ class MCRegisterClass { /// begin/end - Return all of the registers in this class. /// iterator begin() const { return RegsBegin; } - iterator end() const { return RegsEnd; } + iterator end() const { return RegsBegin + RegsSize; } /// getNumRegs - Return the number of registers in this class. /// - unsigned getNumRegs() const { return (unsigned)(RegsEnd-RegsBegin); } + unsigned getNumRegs() const { return RegsSize; } /// getRegister - Return the specified register in the class. /// @@ -115,10 +106,10 @@ class MCRegisterClass { /// of AX. /// struct MCRegisterDesc { - const char *Name; // Printable name for the reg (for debugging) - const unsigned *Overlaps; // Overlapping registers, described above - const unsigned *SubRegs; // Sub-register set, described above - const unsigned *SuperRegs; // Super-register set, described above + const char *Name; // Printable name for the reg (for debugging) + uint32_t Overlaps; // Overlapping registers, described above + uint32_t SubRegs; // Sub-register set, described above + uint32_t SuperRegs; // Super-register set, described above }; /// MCRegisterInfo base class - We assume that the target defines a static @@ -136,50 +127,82 @@ struct MCRegisterDesc { class MCRegisterInfo { public: typedef const MCRegisterClass *regclass_iterator; + + /// DwarfLLVMRegPair - Emitted by tablegen so Dwarf<->LLVM reg mappings can be + /// performed with a binary search. + struct DwarfLLVMRegPair { + unsigned FromReg; + unsigned ToReg; + + bool operator<(DwarfLLVMRegPair RHS) const { return FromReg < RHS.FromReg; } + }; private: const MCRegisterDesc *Desc; // Pointer to the descriptor array unsigned NumRegs; // Number of entries in the array unsigned RAReg; // Return address register const MCRegisterClass *Classes; // Pointer to the regclass array unsigned NumClasses; // Number of entries in the array - DenseMap L2DwarfRegs; // LLVM to Dwarf regs mapping - DenseMap EHL2DwarfRegs; // LLVM to Dwarf regs mapping EH - DenseMap Dwarf2LRegs; // Dwarf to LLVM regs mapping - DenseMap EHDwarf2LRegs; // Dwarf to LLVM regs mapping EH + const uint16_t *RegLists; // Pointer to the reglists array + const uint16_t *SubRegIndices; // Pointer to the subreg lookup + // array. + unsigned NumSubRegIndices; // Number of subreg indices. + + unsigned L2DwarfRegsSize; + unsigned EHL2DwarfRegsSize; + unsigned Dwarf2LRegsSize; + unsigned EHDwarf2LRegsSize; + const DwarfLLVMRegPair *L2DwarfRegs; // LLVM to Dwarf regs mapping + const DwarfLLVMRegPair *EHL2DwarfRegs; // LLVM to Dwarf regs mapping EH + const DwarfLLVMRegPair *Dwarf2LRegs; // Dwarf to LLVM regs mapping + const DwarfLLVMRegPair *EHDwarf2LRegs; // Dwarf to LLVM regs mapping EH DenseMap L2SEHRegs; // LLVM to SEH regs mapping public: /// InitMCRegisterInfo - Initialize MCRegisterInfo, called by TableGen /// auto-generated routines. *DO NOT USE*. void InitMCRegisterInfo(const MCRegisterDesc *D, unsigned NR, unsigned RA, - const MCRegisterClass *C, unsigned NC) { + const MCRegisterClass *C, unsigned NC, + const uint16_t *RL, + const uint16_t *SubIndices, + unsigned NumIndices) { Desc = D; NumRegs = NR; RAReg = RA; Classes = C; + RegLists = RL; NumClasses = NC; + SubRegIndices = SubIndices; + NumSubRegIndices = NumIndices; } - /// mapLLVMRegToDwarfReg - Used to initialize LLVM register to Dwarf + /// mapLLVMRegsToDwarfRegs - Used to initialize LLVM register to Dwarf /// register number mapping. Called by TableGen auto-generated routines. /// *DO NOT USE*. - void mapLLVMRegToDwarfReg(unsigned LLVMReg, int DwarfReg, bool isEH) { - if (isEH) - EHL2DwarfRegs[LLVMReg] = DwarfReg; - else - L2DwarfRegs[LLVMReg] = DwarfReg; + void mapLLVMRegsToDwarfRegs(const DwarfLLVMRegPair *Map, unsigned Size, + bool isEH) { + if (isEH) { + EHL2DwarfRegs = Map; + EHL2DwarfRegsSize = Size; + } else { + L2DwarfRegs = Map; + L2DwarfRegsSize = Size; + } } - - /// mapDwarfRegToLLVMReg - Used to initialize Dwarf register to LLVM + + /// mapDwarfRegsToLLVMRegs - Used to initialize Dwarf register to LLVM /// register number mapping. Called by TableGen auto-generated routines. /// *DO NOT USE*. - void mapDwarfRegToLLVMReg(unsigned DwarfReg, unsigned LLVMReg, bool isEH) { - if (isEH) - EHDwarf2LRegs[DwarfReg] = LLVMReg; - else - Dwarf2LRegs[DwarfReg] = LLVMReg; + void mapDwarfRegsToLLVMRegs(const DwarfLLVMRegPair *Map, unsigned Size, + bool isEH) { + if (isEH) { + EHDwarf2LRegs = Map; + EHDwarf2LRegsSize = Size; + } else { + Dwarf2LRegs = Map; + Dwarf2LRegsSize = Size; + } } - + /// mapLLVMRegToSEHReg - Used to initialize LLVM register to SEH register /// number mapping. By default the SEH register number is just the same /// as the LLVM register number. @@ -212,9 +235,9 @@ class MCRegisterInfo { /// register, or a null list of there are none. The list returned is zero /// terminated. /// - const unsigned *getAliasSet(unsigned RegNo) const { + const uint16_t *getAliasSet(unsigned RegNo) const { // The Overlaps set always begins with Reg itself. - return get(RegNo).Overlaps + 1; + return RegLists + get(RegNo).Overlaps + 1; } /// getOverlaps - Return a list of registers that overlap Reg, including @@ -222,8 +245,8 @@ class MCRegisterInfo { /// list. /// These are exactly the registers in { x | regsOverlap(x, Reg) }. /// - const unsigned *getOverlaps(unsigned RegNo) const { - return get(RegNo).Overlaps; + const uint16_t *getOverlaps(unsigned RegNo) const { + return RegLists + get(RegNo).Overlaps; } /// getSubRegisters - Return the list of registers that are sub-registers of @@ -231,8 +254,35 @@ class MCRegisterInfo { /// returned is zero terminated and sorted according to super-sub register /// relations. e.g. X86::RAX's sub-register list is EAX, AX, AL, AH. /// - const unsigned *getSubRegisters(unsigned RegNo) const { - return get(RegNo).SubRegs; + const uint16_t *getSubRegisters(unsigned RegNo) const { + return RegLists + get(RegNo).SubRegs; + } + + /// getSubReg - Returns the physical register number of sub-register "Index" + /// for physical register RegNo. Return zero if the sub-register does not + /// exist. + unsigned getSubReg(unsigned Reg, unsigned Idx) const { + return *(SubRegIndices + (Reg - 1) * NumSubRegIndices + Idx - 1); + } + + /// getMatchingSuperReg - Return a super-register of the specified register + /// Reg so its sub-register of index SubIdx is Reg. + unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, + const MCRegisterClass *RC) const { + for (const uint16_t *SRs = getSuperRegisters(Reg); unsigned SR = *SRs;++SRs) + if (Reg == getSubReg(SR, SubIdx) && RC->contains(SR)) + return SR; + return 0; + } + + /// getSubRegIndex - For a given register pair, return the sub-register index + /// if the second register is a sub-register of the first. Return zero + /// otherwise. + unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const { + for (unsigned I = 1; I <= NumSubRegIndices; ++I) + if (getSubReg(RegNo, I) == SubRegNo) + return I; + return 0; } /// getSuperRegisters - Return the list of registers that are super-registers @@ -240,8 +290,8 @@ class MCRegisterInfo { /// returned is zero terminated and sorted according to super-sub register /// relations. e.g. X86::AL's super-register list is AX, EAX, RAX. /// - const unsigned *getSuperRegisters(unsigned RegNo) const { - return get(RegNo).SuperRegs; + const uint16_t *getSuperRegisters(unsigned RegNo) const { + return RegLists + get(RegNo).SuperRegs; } /// getName - Return the human-readable symbolic target-specific name for the @@ -261,22 +311,26 @@ class MCRegisterInfo { /// parameter allows targets to use different numberings for EH info and /// debugging info. int getDwarfRegNum(unsigned RegNum, bool isEH) const { - const DenseMap &M = isEH ? EHL2DwarfRegs : L2DwarfRegs; - const DenseMap::const_iterator I = M.find(RegNum); - if (I == M.end()) return -1; - return I->second; + const DwarfLLVMRegPair *M = isEH ? EHL2DwarfRegs : L2DwarfRegs; + unsigned Size = isEH ? EHL2DwarfRegsSize : L2DwarfRegsSize; + + DwarfLLVMRegPair Key = { RegNum, 0 }; + const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key); + if (I == M+Size || I->FromReg != RegNum) + return -1; + return I->ToReg; } /// getLLVMRegNum - Map a dwarf register back to a target register. /// int getLLVMRegNum(unsigned RegNum, bool isEH) const { - const DenseMap &M = isEH ? EHDwarf2LRegs : Dwarf2LRegs; - const DenseMap::const_iterator I = M.find(RegNum); - if (I == M.end()) { - assert(0 && "Invalid RegNum"); - return -1; - } - return I->second; + const DwarfLLVMRegPair *M = isEH ? EHDwarf2LRegs : Dwarf2LRegs; + unsigned Size = isEH ? EHDwarf2LRegsSize : Dwarf2LRegsSize; + + DwarfLLVMRegPair Key = { RegNum, 0 }; + const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key); + assert(I != M+Size && I->FromReg == RegNum && "Invalid RegNum"); + return I->ToReg; } /// getSEHRegNum - Map a target register to an equivalent SEH register @@ -301,7 +355,7 @@ class MCRegisterInfo { return Classes[i]; } }; - + } // End llvm namespace #endif diff --git a/contrib/llvm/include/llvm/MC/MCSection.h b/contrib/llvm/include/llvm/MC/MCSection.h index 57008177b6d3..7da6534b6e88 100644 --- a/contrib/llvm/include/llvm/MC/MCSection.h +++ b/contrib/llvm/include/llvm/MC/MCSection.h @@ -14,12 +14,10 @@ #ifndef LLVM_MC_MCSECTION_H #define LLVM_MC_MCSECTION_H -#include "llvm/ADT/StringRef.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Casting.h" namespace llvm { - class MCContext; class MCAsmInfo; class raw_ostream; diff --git a/contrib/llvm/include/llvm/MC/MCSectionCOFF.h b/contrib/llvm/include/llvm/MC/MCSectionCOFF.h index b154cf59d106..7eacde57f48f 100644 --- a/contrib/llvm/include/llvm/MC/MCSectionCOFF.h +++ b/contrib/llvm/include/llvm/MC/MCSectionCOFF.h @@ -15,8 +15,8 @@ #define LLVM_MC_MCSECTIONCOFF_H #include "llvm/MC/MCSection.h" - #include "llvm/Support/COFF.h" +#include "llvm/ADT/StringRef.h" namespace llvm { diff --git a/contrib/llvm/include/llvm/MC/MCSectionELF.h b/contrib/llvm/include/llvm/MC/MCSectionELF.h index c82de7128202..7321ca83e897 100644 --- a/contrib/llvm/include/llvm/MC/MCSectionELF.h +++ b/contrib/llvm/include/llvm/MC/MCSectionELF.h @@ -16,6 +16,7 @@ #include "llvm/MC/MCSection.h" #include "llvm/Support/ELF.h" +#include "llvm/ADT/StringRef.h" namespace llvm { diff --git a/contrib/llvm/include/llvm/MC/MCSectionMachO.h b/contrib/llvm/include/llvm/MC/MCSectionMachO.h index bdb17e9008b2..15eb4f4a7685 100644 --- a/contrib/llvm/include/llvm/MC/MCSectionMachO.h +++ b/contrib/llvm/include/llvm/MC/MCSectionMachO.h @@ -15,6 +15,7 @@ #define LLVM_MC_MCSECTIONMACHO_H #include "llvm/MC/MCSection.h" +#include "llvm/ADT/StringRef.h" namespace llvm { diff --git a/contrib/llvm/include/llvm/MC/MCStreamer.h b/contrib/llvm/include/llvm/MC/MCStreamer.h index 451efbff6e3a..25956008e021 100644 --- a/contrib/llvm/include/llvm/MC/MCStreamer.h +++ b/contrib/llvm/include/llvm/MC/MCStreamer.h @@ -23,7 +23,6 @@ namespace llvm { class MCAsmBackend; - class MCAsmInfo; class MCCodeEmitter; class MCContext; class MCExpr; @@ -32,7 +31,6 @@ namespace llvm { class MCSection; class MCSymbol; class StringRef; - class TargetLoweringObjectFile; class Twine; class raw_ostream; class formatted_raw_ostream; @@ -94,6 +92,10 @@ namespace llvm { const MCExpr *ForceExpAbs(const MCExpr* Expr); + void RecordProcStart(MCDwarfFrameInfo &Frame); + virtual void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame); + void RecordProcEnd(MCDwarfFrameInfo &Frame); + virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &CurFrame); void EmitFrames(bool usingCFI); MCWin64EHUnwindInfo *getCurrentW64UnwindInfo(){return CurrentW64UnwindInfo;} @@ -334,6 +336,11 @@ namespace llvm { /// EndCOFFSymbolDef - Marks the end of the symbol definition. virtual void EndCOFFSymbolDef() = 0; + /// EmitCOFFSecRel32 - Emits a COFF section relative relocation. + /// + /// @param Symbol - Symbol the section relative realocation should point to. + virtual void EmitCOFFSecRel32(MCSymbol const *Symbol); + /// EmitELFSize - Emit an ELF .size directive. /// /// This corresponds to an assembler statement such as: @@ -420,7 +427,8 @@ namespace llvm { /// EmitULEB128Value - Special case of EmitULEB128Value that avoids the /// client having to pass in a MCExpr for constant integers. - void EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace = 0); + void EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace = 0, + unsigned Padding = 0); /// EmitSLEB128Value - Special case of EmitSLEB128Value that avoids the /// client having to pass in a MCExpr for constant integers. @@ -431,6 +439,13 @@ namespace llvm { void EmitSymbolValue(const MCSymbol *Sym, unsigned Size, unsigned AddrSpace = 0); + /// EmitGPRel64Value - Emit the expression @p Value into the output as a + /// gprel64 (64-bit GP relative) value. + /// + /// This is used to implement assembler directives such as .gpdword on + /// targets that support them. + virtual void EmitGPRel64Value(const MCExpr *Value); + /// EmitGPRel32Value - Emit the expression @p Value into the output as a /// gprel32 (32-bit GP relative) value. /// @@ -493,7 +508,8 @@ namespace llvm { /// @param Offset - The offset to reach. This may be an expression, but the /// expression must be associated with the current section. /// @param Value - The value to use when filling bytes. - virtual void EmitValueToOffset(const MCExpr *Offset, + /// @return false on success, true if the offset was invalid. + virtual bool EmitValueToOffset(const MCExpr *Offset, unsigned char Value = 0) = 0; /// @} @@ -505,7 +521,8 @@ namespace llvm { /// EmitDwarfFileDirective - Associate a filename with a specified logical /// file number. This implements the DWARF2 '.file 4 "foo.c"' assembler /// directive. - virtual bool EmitDwarfFileDirective(unsigned FileNo,StringRef Filename); + virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, + StringRef Filename); /// EmitDwarfLocDirective - This implements the DWARF2 // '.loc fileno lineno ...' assembler directive. @@ -529,8 +546,8 @@ namespace llvm { virtual void EmitCompactUnwindEncoding(uint32_t CompactUnwindEncoding); virtual void EmitCFISections(bool EH, bool Debug); - virtual void EmitCFIStartProc(); - virtual void EmitCFIEndProc(); + void EmitCFIStartProc(); + void EmitCFIEndProc(); virtual void EmitCFIDefCfa(int64_t Register, int64_t Offset); virtual void EmitCFIDefCfaOffset(int64_t Offset); virtual void EmitCFIDefCfaRegister(int64_t Register); @@ -540,8 +557,11 @@ namespace llvm { virtual void EmitCFIRememberState(); virtual void EmitCFIRestoreState(); virtual void EmitCFISameValue(int64_t Register); + virtual void EmitCFIRestore(int64_t Register); virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset); virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment); + virtual void EmitCFIEscape(StringRef Values); + virtual void EmitCFISignalFrame(); virtual void EmitWin64EHStartProc(const MCSymbol *Symbol); virtual void EmitWin64EHEndProc(); @@ -581,8 +601,10 @@ namespace llvm { virtual void EmitRegSave(const SmallVectorImpl &RegList, bool isVector); + /// FinishImpl - Streamer specific finalization. + virtual void FinishImpl() = 0; /// Finish - Finish emission of machine code. - virtual void Finish() = 0; + void Finish(); }; /// createNullStreamer - Create a dummy machine code streamer, which does @@ -613,6 +635,7 @@ namespace llvm { bool isVerboseAsm, bool useLoc, bool useCFI, + bool useDwarfDirectory, MCInstPrinter *InstPrint = 0, MCCodeEmitter *CE = 0, MCAsmBackend *TAB = 0, @@ -638,14 +661,8 @@ namespace llvm { /// createELFStreamer - Create a machine code streamer which will generate /// ELF format object files. MCStreamer *createELFStreamer(MCContext &Ctx, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *CE, - bool RelaxAll, bool NoExecStack); - - /// createLoggingStreamer - Create a machine code streamer which just logs the - /// API calls and then dispatches to another streamer. - /// - /// The new streamer takes ownership of the \arg Child. - MCStreamer *createLoggingStreamer(MCStreamer *Child, raw_ostream &OS); + raw_ostream &OS, MCCodeEmitter *CE, + bool RelaxAll, bool NoExecStack); /// createPureStreamer - Create a machine code streamer which will generate /// "pure" MC object files, for use with MC-JIT and testing tools. diff --git a/contrib/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h b/contrib/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h new file mode 100644 index 000000000000..7a0b1ffaf0a0 --- /dev/null +++ b/contrib/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h @@ -0,0 +1,36 @@ +//===-- llvm/MC/MCWinCOFFObjectWriter.h - Win COFF Object Writer *- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCWINCOFFOBJECTWRITER_H +#define LLVM_MC_MCWINCOFFOBJECTWRITER_H + +namespace llvm { + class MCWinCOFFObjectTargetWriter { + const unsigned Machine; + + protected: + MCWinCOFFObjectTargetWriter(unsigned Machine_); + + public: + virtual ~MCWinCOFFObjectTargetWriter() {} + + unsigned getMachine() const { return Machine; } + virtual unsigned getRelocType(unsigned FixupKind) const = 0; + }; + + /// \brief Construct a new Win COFF writer instance. + /// + /// \param MOTW - The target specific WinCOFF writer subclass. + /// \param OS - The stream to write to. + /// \returns The constructed object writer. + MCObjectWriter *createWinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, + raw_ostream &OS); +} // End llvm namespace + +#endif diff --git a/contrib/llvm/include/llvm/Metadata.h b/contrib/llvm/include/llvm/Metadata.h index 887e33c7a181..73579861ec41 100644 --- a/contrib/llvm/include/llvm/Metadata.h +++ b/contrib/llvm/include/llvm/Metadata.h @@ -36,30 +36,27 @@ template /// These are used to efficiently contain a byte sequence for metadata. /// MDString is always unnamed. class MDString : public Value { + virtual void anchor(); MDString(const MDString &); // DO NOT IMPLEMENT - StringRef Str; - explicit MDString(LLVMContext &C, StringRef S); - + explicit MDString(LLVMContext &C); public: static MDString *get(LLVMContext &Context, StringRef Str); static MDString *get(LLVMContext &Context, const char *Str) { return get(Context, Str ? StringRef(Str) : StringRef()); } - StringRef getString() const { return Str; } + StringRef getString() const { return getName(); } - unsigned getLength() const { return (unsigned)Str.size(); } + unsigned getLength() const { return (unsigned)getName().size(); } typedef StringRef::iterator iterator; /// begin() - Pointer to the first byte of the string. - /// - iterator begin() const { return Str.begin(); } + iterator begin() const { return getName().begin(); } /// end() - Pointer to one byte past the end of the string. - /// - iterator end() const { return Str.end(); } + iterator end() const { return getName().end(); } /// Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const MDString *) { return true; } @@ -78,6 +75,10 @@ class MDNode : public Value, public FoldingSetNode { void operator=(const MDNode &); // DO NOT IMPLEMENT friend class MDNodeOperand; friend class LLVMContextImpl; + friend struct FoldingSetTrait; + + /// Hash - If the MDNode is uniqued cache the hash to speed up lookup. + unsigned Hash; /// NumOperands - This many 'MDNodeOperand' items are co-allocated onto the /// end of this MDNode. @@ -134,6 +135,9 @@ class MDNode : public Value, public FoldingSetNode { /// deleteTemporary - Deallocate a node created by getTemporary. The /// node must not have any users. static void deleteTemporary(MDNode *N); + + /// replaceOperandWith - Replace a specific operand. + void replaceOperandWith(unsigned i, Value *NewVal); /// getOperand - Return specified operand. Value *getOperand(unsigned i) const; @@ -225,6 +229,9 @@ class NamedMDNode : public ilist_node { /// print - Implement operator<< on NamedMDNode. void print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW = 0) const; + + /// dump() - Allow printing of NamedMDNodes from the debugger. + void dump() const; }; } // end llvm namespace diff --git a/contrib/llvm/include/llvm/Module.h b/contrib/llvm/include/llvm/Module.h index 8ce5ec4f1d14..b9c98814f159 100644 --- a/contrib/llvm/include/llvm/Module.h +++ b/contrib/llvm/include/llvm/Module.h @@ -30,8 +30,7 @@ class GVMaterializer; class LLVMContext; class StructType; template struct DenseMapInfo; -template class DenseMap; +template class DenseMap; template<> struct ilist_traits : public SymbolTableListTraits { @@ -154,6 +153,39 @@ class Module { /// An enumeration for describing the size of a pointer on the target machine. enum PointerSize { AnyPointerSize, Pointer32, Pointer64 }; + /// An enumeration for the supported behaviors of module flags. The following + /// module flags behavior values are supported: + /// + /// Value Behavior + /// ----- -------- + /// 1 Error + /// Emits an error if two values disagree. + /// + /// 2 Warning + /// Emits a warning if two values disagree. + /// + /// 3 Require + /// Emits an error when the specified value is not present + /// or doesn't have the specified value. It is an error for + /// two (or more) llvm.module.flags with the same ID to have + /// the Require behavior but different values. There may be + /// multiple Require flags per ID. + /// + /// 4 Override + /// Uses the specified value if the two values disagree. It + /// is an error for two (or more) llvm.module.flags with the + /// same ID to have the Override behavior but different + /// values. + enum ModFlagBehavior { Error = 1, Warning = 2, Require = 3, Override = 4 }; + + struct ModuleFlagEntry { + ModFlagBehavior Behavior; + MDString *Key; + Value *Val; + ModuleFlagEntry(ModFlagBehavior B, MDString *K, Value *V) + : Behavior(B), Key(K), Val(V) {} + }; + /// @} /// @name Member Variables /// @{ @@ -266,8 +298,8 @@ class Module { void getMDKindNames(SmallVectorImpl &Result) const; - typedef DenseMap, - DenseMapInfo > NumeredTypesMapTy; + typedef DenseMap > + NumeredTypesMapTy; /// findUsedStructTypes - Walk the entire module and find all of the /// struct types that are in use, returning them in a vector. @@ -372,6 +404,30 @@ class Module { /// and delete it. void eraseNamedMetadata(NamedMDNode *NMD); +/// @} +/// @name Module Flags Accessors +/// @{ + + /// getModuleFlagsMetadata - Returns the module flags in the provided vector. + void getModuleFlagsMetadata(SmallVectorImpl &Flags) const; + + /// getModuleFlagsMetadata - Returns the NamedMDNode in the module that + /// represents module-level flags. This method returns null if there are no + /// module-level flags. + NamedMDNode *getModuleFlagsMetadata() const; + + /// getOrInsertModuleFlagsMetadata - Returns the NamedMDNode in the module + /// that represents module-level flags. If module-level flags aren't found, + /// it creates the named metadata that contains them. + NamedMDNode *getOrInsertModuleFlagsMetadata(); + + /// addModuleFlag - Add a module-level flag to the module-level flags + /// metadata. It will create the module-level flags named metadata if it + /// doesn't already exist. + void addModuleFlag(ModFlagBehavior Behavior, StringRef Key, Value *Val); + void addModuleFlag(ModFlagBehavior Behavior, StringRef Key, uint32_t Val); + void addModuleFlag(MDNode *Node); + /// @} /// @name Materialization /// @{ diff --git a/contrib/llvm/include/llvm/Object/Archive.h b/contrib/llvm/include/llvm/Object/Archive.h index 4f081206c5bc..358b27a416cd 100644 --- a/contrib/llvm/include/llvm/Object/Archive.h +++ b/contrib/llvm/include/llvm/Object/Archive.h @@ -22,6 +22,7 @@ namespace llvm { namespace object { class Archive : public Binary { + virtual void anchor(); public: class Child { const Archive *Parent; @@ -34,6 +35,10 @@ class Archive : public Binary { return (Parent == other.Parent) && (Data.begin() == other.Data.begin()); } + bool operator <(const Child &other) const { + return Data.begin() < other.Data.begin(); + } + Child getNext() const; error_code getName(StringRef &Result) const; int getLastModified() const; @@ -50,6 +55,7 @@ class Archive : public Binary { class child_iterator { Child child; public: + child_iterator() : child(Child(0, StringRef())) {} child_iterator(const Child &c) : child(c) {} const Child* operator->() const { return &child; @@ -63,24 +69,73 @@ class Archive : public Binary { return !(*this == other); } + bool operator <(const child_iterator &other) const { + return child < other.child; + } + child_iterator& operator++() { // Preincrement child = child.getNext(); return *this; } }; + class Symbol { + const Archive *Parent; + uint32_t SymbolIndex; + uint32_t StringIndex; // Extra index to the string. + + public: + bool operator ==(const Symbol &other) const { + return (Parent == other.Parent) && (SymbolIndex == other.SymbolIndex); + } + + Symbol(const Archive *p, uint32_t symi, uint32_t stri) + : Parent(p) + , SymbolIndex(symi) + , StringIndex(stri) {} + error_code getName(StringRef &Result) const; + error_code getMember(child_iterator &Result) const; + Symbol getNext() const; + }; + + class symbol_iterator { + Symbol symbol; + public: + symbol_iterator(const Symbol &s) : symbol(s) {} + const Symbol *operator->() const { + return &symbol; + } + + bool operator==(const symbol_iterator &other) const { + return symbol == other.symbol; + } + + bool operator!=(const symbol_iterator &other) const { + return !(*this == other); + } + + symbol_iterator& operator++() { // Preincrement + symbol = symbol.getNext(); + return *this; + } + }; + Archive(MemoryBuffer *source, error_code &ec); - child_iterator begin_children() const; + child_iterator begin_children(bool skip_internal = true) const; child_iterator end_children() const; + symbol_iterator begin_symbols() const; + symbol_iterator end_symbols() const; + // Cast methods. static inline bool classof(Archive const *v) { return true; } static inline bool classof(Binary const *v) { - return v->getType() == Binary::isArchive; + return v->isArchive(); } private: + child_iterator SymbolTable; child_iterator StringTable; }; diff --git a/contrib/llvm/include/llvm/Object/Binary.h b/contrib/llvm/include/llvm/Object/Binary.h index cd092fd8e485..77a08d597c4d 100644 --- a/contrib/llvm/include/llvm/Object/Binary.h +++ b/contrib/llvm/include/llvm/Object/Binary.h @@ -37,16 +37,25 @@ class Binary { Binary(unsigned int Type, MemoryBuffer *Source); enum { - isArchive, - + ID_Archive, // Object and children. - isObject, - isCOFF, - isELF, - isMachO, - lastObject + ID_StartObjects, + ID_COFF, + ID_ELF32L, // ELF 32-bit, little endian + ID_ELF32B, // ELF 32-bit, big endian + ID_ELF64L, // ELF 64-bit, little endian + ID_ELF64B, // ELF 64-bit, big endian + ID_MachO, + ID_EndObjects }; + static inline unsigned int getELFType(bool isLittleEndian, bool is64Bits) { + if (isLittleEndian) + return is64Bits ? ID_ELF64L : ID_ELF32L; + else + return is64Bits ? ID_ELF64B : ID_ELF32B; + } + public: virtual ~Binary(); @@ -56,9 +65,37 @@ class Binary { // Cast methods. unsigned int getType() const { return TypeID; } static inline bool classof(const Binary *v) { return true; } + + // Convenience methods + bool isObject() const { + return TypeID > ID_StartObjects && TypeID < ID_EndObjects; + } + + bool isArchive() const { + return TypeID == ID_Archive; + } + + bool isELF() const { + return TypeID >= ID_ELF32L && TypeID <= ID_ELF64B; + } + + bool isMachO() const { + return TypeID == ID_MachO; + } + + bool isCOFF() const { + return TypeID == ID_COFF; + } }; +/// @brief Create a Binary from Source, autodetecting the file type. +/// +/// @param Source The data to create the Binary from. Ownership is transfered +/// to Result if successful. If an error is returned, Source is destroyed +/// by createBinary before returning. +/// @param Result A pointer to the resulting Binary if no error occured. error_code createBinary(MemoryBuffer *Source, OwningPtr &Result); + error_code createBinary(StringRef Path, OwningPtr &Result); } diff --git a/contrib/llvm/include/llvm/Object/COFF.h b/contrib/llvm/include/llvm/Object/COFF.h index 067bcd471ae9..68b5ca1bc781 100644 --- a/contrib/llvm/include/llvm/Object/COFF.h +++ b/contrib/llvm/include/llvm/Object/COFF.h @@ -19,6 +19,9 @@ #include "llvm/Support/Endian.h" namespace llvm { + template + class ArrayRef; + namespace object { struct coff_file_header { @@ -45,13 +48,18 @@ struct coff_symbol { support::ulittle32_t Value; support::little16_t SectionNumber; - struct { - support::ulittle8_t BaseType; - support::ulittle8_t ComplexType; - } Type; + support::ulittle16_t Type; support::ulittle8_t StorageClass; support::ulittle8_t NumberOfAuxSymbols; + + uint8_t getBaseType() const { + return Type & 0x0F; + } + + uint8_t getComplexType() const { + return (Type & 0xF0) >> 4; + } }; struct coff_section { @@ -73,6 +81,16 @@ struct coff_relocation { support::ulittle16_t Type; }; +struct coff_aux_section_definition { + support::ulittle32_t Length; + support::ulittle16_t NumberOfRelocations; + support::ulittle16_t NumberOfLinenumbers; + support::ulittle32_t CheckSum; + support::ulittle16_t Number; + support::ulittle8_t Selection; + char Unused[3]; +}; + class COFFObjectFile : public ObjectFile { private: const coff_file_header *Header; @@ -81,11 +99,7 @@ class COFFObjectFile : public ObjectFile { const char *StringTable; uint32_t StringTableSize; - error_code getSection(int32_t index, - const coff_section *&Res) const; error_code getString(uint32_t offset, StringRef &Res) const; - error_code getSymbol(uint32_t index, - const coff_symbol *&Res) const; const coff_symbol *toSymb(DataRefImpl Symb) const; const coff_section *toSec(DataRefImpl Sec) const; @@ -94,13 +108,14 @@ class COFFObjectFile : public ObjectFile { protected: virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const; virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const; - virtual error_code getSymbolOffset(DataRefImpl Symb, uint64_t &Res) const; + virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const; virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const; virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const; virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const; - virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const; - virtual error_code isSymbolGlobal(DataRefImpl Symb, bool &Res) const; - virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::SymbolType &Res) const; + virtual error_code getSymbolFlags(DataRefImpl Symb, uint32_t &Res) const; + virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::Type &Res) const; + virtual error_code getSymbolSection(DataRefImpl Symb, + section_iterator &Res) const; virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const; virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const; @@ -111,6 +126,10 @@ class COFFObjectFile : public ObjectFile { virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const; virtual error_code isSectionData(DataRefImpl Sec, bool &Res) const; virtual error_code isSectionBSS(DataRefImpl Sec, bool &Res) const; + virtual error_code isSectionVirtual(DataRefImpl Sec, bool &Res) const; + virtual error_code isSectionZeroInit(DataRefImpl Sec, bool &Res) const; + virtual error_code isSectionRequiredForExecution(DataRefImpl Sec, + bool &Res) const; virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb, bool &Result) const; virtual relocation_iterator getSectionRelBegin(DataRefImpl Sec) const; @@ -120,10 +139,12 @@ class COFFObjectFile : public ObjectFile { RelocationRef &Res) const; virtual error_code getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const; + virtual error_code getRelocationOffset(DataRefImpl Rel, + uint64_t &Res) const; virtual error_code getRelocationSymbol(DataRefImpl Rel, SymbolRef &Res) const; virtual error_code getRelocationType(DataRefImpl Rel, - uint32_t &Res) const; + uint64_t &Res) const; virtual error_code getRelocationTypeName(DataRefImpl Rel, SmallVectorImpl &Result) const; virtual error_code getRelocationAdditionalInfo(DataRefImpl Rel, @@ -131,16 +152,46 @@ class COFFObjectFile : public ObjectFile { virtual error_code getRelocationValueString(DataRefImpl Rel, SmallVectorImpl &Result) const; + virtual error_code getLibraryNext(DataRefImpl LibData, + LibraryRef &Result) const; + virtual error_code getLibraryPath(DataRefImpl LibData, + StringRef &Result) const; + public: COFFObjectFile(MemoryBuffer *Object, error_code &ec); virtual symbol_iterator begin_symbols() const; virtual symbol_iterator end_symbols() const; + virtual symbol_iterator begin_dynamic_symbols() const; + virtual symbol_iterator end_dynamic_symbols() const; + virtual library_iterator begin_libraries_needed() const; + virtual library_iterator end_libraries_needed() const; virtual section_iterator begin_sections() const; virtual section_iterator end_sections() const; virtual uint8_t getBytesInAddress() const; virtual StringRef getFileFormatName() const; virtual unsigned getArch() const; + virtual StringRef getLoadName() const; + + error_code getHeader(const coff_file_header *&Res) const; + error_code getSection(int32_t index, const coff_section *&Res) const; + error_code getSymbol(uint32_t index, const coff_symbol *&Res) const; + template + error_code getAuxSymbol(uint32_t index, const T *&Res) const { + const coff_symbol *s; + error_code ec = getSymbol(index, s); + Res = reinterpret_cast(s); + return ec; + } + error_code getSymbolName(const coff_symbol *symbol, StringRef &Res) const; + error_code getSectionName(const coff_section *Sec, StringRef &Res) const; + error_code getSectionContents(const coff_section *Sec, + ArrayRef &Res) const; + + static inline bool classof(const Binary *v) { + return v->isCOFF(); + } + static inline bool classof(const COFFObjectFile *v) { return true; } }; } diff --git a/contrib/llvm/include/llvm/Object/ELF.h b/contrib/llvm/include/llvm/Object/ELF.h new file mode 100644 index 000000000000..0828985f2e9b --- /dev/null +++ b/contrib/llvm/include/llvm/Object/ELF.h @@ -0,0 +1,2209 @@ +//===- ELF.h - ELF object file implementation -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the ELFObjectFile template class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_ELF_H +#define LLVM_OBJECT_ELF_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include + +namespace llvm { +namespace object { + +// Templates to choose Elf_Addr and Elf_Off depending on is64Bits. +template +struct ELFDataTypeTypedefHelperCommon { + typedef support::detail::packed_endian_specific_integral + Elf_Half; + typedef support::detail::packed_endian_specific_integral + Elf_Word; + typedef support::detail::packed_endian_specific_integral + Elf_Sword; + typedef support::detail::packed_endian_specific_integral + Elf_Xword; + typedef support::detail::packed_endian_specific_integral + Elf_Sxword; +}; + +template +struct ELFDataTypeTypedefHelper; + +/// ELF 32bit types. +template +struct ELFDataTypeTypedefHelper + : ELFDataTypeTypedefHelperCommon { + typedef uint32_t value_type; + typedef support::detail::packed_endian_specific_integral + Elf_Addr; + typedef support::detail::packed_endian_specific_integral + Elf_Off; +}; + +/// ELF 64bit types. +template +struct ELFDataTypeTypedefHelper + : ELFDataTypeTypedefHelperCommon{ + typedef uint64_t value_type; + typedef support::detail::packed_endian_specific_integral + Elf_Addr; + typedef support::detail::packed_endian_specific_integral + Elf_Off; +}; + +// I really don't like doing this, but the alternative is copypasta. +#define LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) \ +typedef typename \ + ELFDataTypeTypedefHelper::Elf_Addr Elf_Addr; \ +typedef typename \ + ELFDataTypeTypedefHelper::Elf_Off Elf_Off; \ +typedef typename \ + ELFDataTypeTypedefHelper::Elf_Half Elf_Half; \ +typedef typename \ + ELFDataTypeTypedefHelper::Elf_Word Elf_Word; \ +typedef typename \ + ELFDataTypeTypedefHelper::Elf_Sword Elf_Sword; \ +typedef typename \ + ELFDataTypeTypedefHelper::Elf_Xword Elf_Xword; \ +typedef typename \ + ELFDataTypeTypedefHelper::Elf_Sxword Elf_Sxword; + + // Section header. +template +struct Elf_Shdr_Base; + +template +struct Elf_Shdr_Base { + LLVM_ELF_IMPORT_TYPES(target_endianness, false) + Elf_Word sh_name; // Section name (index into string table) + Elf_Word sh_type; // Section type (SHT_*) + Elf_Word sh_flags; // Section flags (SHF_*) + Elf_Addr sh_addr; // Address where section is to be loaded + Elf_Off sh_offset; // File offset of section data, in bytes + Elf_Word sh_size; // Size of section, in bytes + Elf_Word sh_link; // Section type-specific header table index link + Elf_Word sh_info; // Section type-specific extra information + Elf_Word sh_addralign;// Section address alignment + Elf_Word sh_entsize; // Size of records contained within the section +}; + +template +struct Elf_Shdr_Base { + LLVM_ELF_IMPORT_TYPES(target_endianness, true) + Elf_Word sh_name; // Section name (index into string table) + Elf_Word sh_type; // Section type (SHT_*) + Elf_Xword sh_flags; // Section flags (SHF_*) + Elf_Addr sh_addr; // Address where section is to be loaded + Elf_Off sh_offset; // File offset of section data, in bytes + Elf_Xword sh_size; // Size of section, in bytes + Elf_Word sh_link; // Section type-specific header table index link + Elf_Word sh_info; // Section type-specific extra information + Elf_Xword sh_addralign;// Section address alignment + Elf_Xword sh_entsize; // Size of records contained within the section +}; + +template +struct Elf_Shdr_Impl : Elf_Shdr_Base { + using Elf_Shdr_Base::sh_entsize; + using Elf_Shdr_Base::sh_size; + + /// @brief Get the number of entities this section contains if it has any. + unsigned getEntityCount() const { + if (sh_entsize == 0) + return 0; + return sh_size / sh_entsize; + } +}; + +template +struct Elf_Sym_Base; + +template +struct Elf_Sym_Base { + LLVM_ELF_IMPORT_TYPES(target_endianness, false) + Elf_Word st_name; // Symbol name (index into string table) + Elf_Addr st_value; // Value or address associated with the symbol + Elf_Word st_size; // Size of the symbol + unsigned char st_info; // Symbol's type and binding attributes + unsigned char st_other; // Must be zero; reserved + Elf_Half st_shndx; // Which section (header table index) it's defined in +}; + +template +struct Elf_Sym_Base { + LLVM_ELF_IMPORT_TYPES(target_endianness, true) + Elf_Word st_name; // Symbol name (index into string table) + unsigned char st_info; // Symbol's type and binding attributes + unsigned char st_other; // Must be zero; reserved + Elf_Half st_shndx; // Which section (header table index) it's defined in + Elf_Addr st_value; // Value or address associated with the symbol + Elf_Xword st_size; // Size of the symbol +}; + +template +struct Elf_Sym_Impl : Elf_Sym_Base { + using Elf_Sym_Base::st_info; + + // These accessors and mutators correspond to the ELF32_ST_BIND, + // ELF32_ST_TYPE, and ELF32_ST_INFO macros defined in the ELF specification: + unsigned char getBinding() const { return st_info >> 4; } + unsigned char getType() const { return st_info & 0x0f; } + void setBinding(unsigned char b) { setBindingAndType(b, getType()); } + void setType(unsigned char t) { setBindingAndType(getBinding(), t); } + void setBindingAndType(unsigned char b, unsigned char t) { + st_info = (b << 4) + (t & 0x0f); + } +}; + +/// Elf_Versym: This is the structure of entries in the SHT_GNU_versym section +/// (.gnu.version). This structure is identical for ELF32 and ELF64. +template +struct Elf_Versym_Impl { + LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) + Elf_Half vs_index; // Version index with flags (e.g. VERSYM_HIDDEN) +}; + +template +struct Elf_Verdaux_Impl; + +/// Elf_Verdef: This is the structure of entries in the SHT_GNU_verdef section +/// (.gnu.version_d). This structure is identical for ELF32 and ELF64. +template +struct Elf_Verdef_Impl { + LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) + typedef Elf_Verdaux_Impl Elf_Verdaux; + Elf_Half vd_version; // Version of this structure (e.g. VER_DEF_CURRENT) + Elf_Half vd_flags; // Bitwise flags (VER_DEF_*) + Elf_Half vd_ndx; // Version index, used in .gnu.version entries + Elf_Half vd_cnt; // Number of Verdaux entries + Elf_Word vd_hash; // Hash of name + Elf_Word vd_aux; // Offset to the first Verdaux entry (in bytes) + Elf_Word vd_next; // Offset to the next Verdef entry (in bytes) + + /// Get the first Verdaux entry for this Verdef. + const Elf_Verdaux *getAux() const { + return reinterpret_cast((const char*)this + vd_aux); + } +}; + +/// Elf_Verdaux: This is the structure of auxilary data in the SHT_GNU_verdef +/// section (.gnu.version_d). This structure is identical for ELF32 and ELF64. +template +struct Elf_Verdaux_Impl { + LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) + Elf_Word vda_name; // Version name (offset in string table) + Elf_Word vda_next; // Offset to next Verdaux entry (in bytes) +}; + +/// Elf_Verneed: This is the structure of entries in the SHT_GNU_verneed +/// section (.gnu.version_r). This structure is identical for ELF32 and ELF64. +template +struct Elf_Verneed_Impl { + LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) + Elf_Half vn_version; // Version of this structure (e.g. VER_NEED_CURRENT) + Elf_Half vn_cnt; // Number of associated Vernaux entries + Elf_Word vn_file; // Library name (string table offset) + Elf_Word vn_aux; // Offset to first Vernaux entry (in bytes) + Elf_Word vn_next; // Offset to next Verneed entry (in bytes) +}; + +/// Elf_Vernaux: This is the structure of auxiliary data in SHT_GNU_verneed +/// section (.gnu.version_r). This structure is identical for ELF32 and ELF64. +template +struct Elf_Vernaux_Impl { + LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) + Elf_Word vna_hash; // Hash of dependency name + Elf_Half vna_flags; // Bitwise Flags (VER_FLAG_*) + Elf_Half vna_other; // Version index, used in .gnu.version entries + Elf_Word vna_name; // Dependency name + Elf_Word vna_next; // Offset to next Vernaux entry (in bytes) +}; + +/// Elf_Dyn_Base: This structure matches the form of entries in the dynamic +/// table section (.dynamic) look like. +template +struct Elf_Dyn_Base; + +template +struct Elf_Dyn_Base { + LLVM_ELF_IMPORT_TYPES(target_endianness, false) + Elf_Sword d_tag; + union { + Elf_Word d_val; + Elf_Addr d_ptr; + } d_un; +}; + +template +struct Elf_Dyn_Base { + LLVM_ELF_IMPORT_TYPES(target_endianness, true) + Elf_Sxword d_tag; + union { + Elf_Xword d_val; + Elf_Addr d_ptr; + } d_un; +}; + +/// Elf_Dyn_Impl: This inherits from Elf_Dyn_Base, adding getters and setters. +template +struct Elf_Dyn_Impl : Elf_Dyn_Base { + using Elf_Dyn_Base::d_tag; + using Elf_Dyn_Base::d_un; + int64_t getTag() const { return d_tag; } + uint64_t getVal() const { return d_un.d_val; } + uint64_t getPtr() const { return d_un.ptr; } +}; + +template +class ELFObjectFile; + +// DynRefImpl: Reference to an entry in the dynamic table +// This is an ELF-specific interface. +template +class DynRefImpl { + typedef Elf_Dyn_Impl Elf_Dyn; + typedef ELFObjectFile OwningType; + + DataRefImpl DynPimpl; + const OwningType *OwningObject; + +public: + DynRefImpl() : OwningObject(NULL) { } + + DynRefImpl(DataRefImpl DynP, const OwningType *Owner); + + bool operator==(const DynRefImpl &Other) const; + bool operator <(const DynRefImpl &Other) const; + + error_code getNext(DynRefImpl &Result) const; + int64_t getTag() const; + uint64_t getVal() const; + uint64_t getPtr() const; + + DataRefImpl getRawDataRefImpl() const; +}; + +// Elf_Rel: Elf Relocation +template +struct Elf_Rel_Base; + +template +struct Elf_Rel_Base { + LLVM_ELF_IMPORT_TYPES(target_endianness, false) + Elf_Addr r_offset; // Location (file byte offset, or program virtual addr) + Elf_Word r_info; // Symbol table index and type of relocation to apply +}; + +template +struct Elf_Rel_Base { + LLVM_ELF_IMPORT_TYPES(target_endianness, true) + Elf_Addr r_offset; // Location (file byte offset, or program virtual addr) + Elf_Xword r_info; // Symbol table index and type of relocation to apply +}; + +template +struct Elf_Rel_Base { + LLVM_ELF_IMPORT_TYPES(target_endianness, false) + Elf_Addr r_offset; // Location (file byte offset, or program virtual addr) + Elf_Word r_info; // Symbol table index and type of relocation to apply + Elf_Sword r_addend; // Compute value for relocatable field by adding this +}; + +template +struct Elf_Rel_Base { + LLVM_ELF_IMPORT_TYPES(target_endianness, true) + Elf_Addr r_offset; // Location (file byte offset, or program virtual addr) + Elf_Xword r_info; // Symbol table index and type of relocation to apply + Elf_Sxword r_addend; // Compute value for relocatable field by adding this. +}; + +template +struct Elf_Rel_Impl; + +template +struct Elf_Rel_Impl + : Elf_Rel_Base { + using Elf_Rel_Base::r_info; + LLVM_ELF_IMPORT_TYPES(target_endianness, true) + + // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE, + // and ELF64_R_INFO macros defined in the ELF specification: + uint64_t getSymbol() const { return (r_info >> 32); } + unsigned char getType() const { + return (unsigned char) (r_info & 0xffffffffL); + } + void setSymbol(uint64_t s) { setSymbolAndType(s, getType()); } + void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); } + void setSymbolAndType(uint64_t s, unsigned char t) { + r_info = (s << 32) + (t&0xffffffffL); + } +}; + +template +struct Elf_Rel_Impl + : Elf_Rel_Base { + using Elf_Rel_Base::r_info; + LLVM_ELF_IMPORT_TYPES(target_endianness, false) + + // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE, + // and ELF32_R_INFO macros defined in the ELF specification: + uint32_t getSymbol() const { return (r_info >> 8); } + unsigned char getType() const { return (unsigned char) (r_info & 0x0ff); } + void setSymbol(uint32_t s) { setSymbolAndType(s, getType()); } + void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); } + void setSymbolAndType(uint32_t s, unsigned char t) { + r_info = (s << 8) + t; + } +}; + + +template +class ELFObjectFile : public ObjectFile { + LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) + + typedef Elf_Shdr_Impl Elf_Shdr; + typedef Elf_Sym_Impl Elf_Sym; + typedef Elf_Dyn_Impl Elf_Dyn; + typedef Elf_Rel_Impl Elf_Rel; + typedef Elf_Rel_Impl Elf_Rela; + typedef Elf_Verdef_Impl Elf_Verdef; + typedef Elf_Verdaux_Impl Elf_Verdaux; + typedef Elf_Verneed_Impl Elf_Verneed; + typedef Elf_Vernaux_Impl Elf_Vernaux; + typedef Elf_Versym_Impl Elf_Versym; + typedef DynRefImpl DynRef; + typedef content_iterator dyn_iterator; + +protected: + struct Elf_Ehdr { + unsigned char e_ident[ELF::EI_NIDENT]; // ELF Identification bytes + Elf_Half e_type; // Type of file (see ET_*) + Elf_Half e_machine; // Required architecture for this file (see EM_*) + Elf_Word e_version; // Must be equal to 1 + Elf_Addr e_entry; // Address to jump to in order to start program + Elf_Off e_phoff; // Program header table's file offset, in bytes + Elf_Off e_shoff; // Section header table's file offset, in bytes + Elf_Word e_flags; // Processor-specific flags + Elf_Half e_ehsize; // Size of ELF header, in bytes + Elf_Half e_phentsize;// Size of an entry in the program header table + Elf_Half e_phnum; // Number of entries in the program header table + Elf_Half e_shentsize;// Size of an entry in the section header table + Elf_Half e_shnum; // Number of entries in the section header table + Elf_Half e_shstrndx; // Section header table index of section name + // string table + bool checkMagic() const { + return (memcmp(e_ident, ELF::ElfMagic, strlen(ELF::ElfMagic))) == 0; + } + unsigned char getFileClass() const { return e_ident[ELF::EI_CLASS]; } + unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; } + }; + // This flag is used for classof, to distinguish ELFObjectFile from + // its subclass. If more subclasses will be created, this flag will + // have to become an enum. + bool isDyldELFObject; + +private: + typedef SmallVector Sections_t; + typedef DenseMap IndexMap_t; + typedef DenseMap > RelocMap_t; + + const Elf_Ehdr *Header; + const Elf_Shdr *SectionHeaderTable; + const Elf_Shdr *dot_shstrtab_sec; // Section header string table. + const Elf_Shdr *dot_strtab_sec; // Symbol header string table. + const Elf_Shdr *dot_dynstr_sec; // Dynamic symbol string table. + + // SymbolTableSections[0] always points to the dynamic string table section + // header, or NULL if there is no dynamic string table. + Sections_t SymbolTableSections; + IndexMap_t SymbolTableSectionsIndexMap; + DenseMap ExtendedSymbolTable; + + const Elf_Shdr *dot_dynamic_sec; // .dynamic + const Elf_Shdr *dot_gnu_version_sec; // .gnu.version + const Elf_Shdr *dot_gnu_version_r_sec; // .gnu.version_r + const Elf_Shdr *dot_gnu_version_d_sec; // .gnu.version_d + + // Pointer to SONAME entry in dynamic string table + // This is set the first time getLoadName is called. + mutable const char *dt_soname; + + // Records for each version index the corresponding Verdef or Vernaux entry. + // This is filled the first time LoadVersionMap() is called. + class VersionMapEntry : public PointerIntPair { + public: + // If the integer is 0, this is an Elf_Verdef*. + // If the integer is 1, this is an Elf_Vernaux*. + VersionMapEntry() : PointerIntPair(NULL, 0) { } + VersionMapEntry(const Elf_Verdef *verdef) + : PointerIntPair(verdef, 0) { } + VersionMapEntry(const Elf_Vernaux *vernaux) + : PointerIntPair(vernaux, 1) { } + bool isNull() const { return getPointer() == NULL; } + bool isVerdef() const { return !isNull() && getInt() == 0; } + bool isVernaux() const { return !isNull() && getInt() == 1; } + const Elf_Verdef *getVerdef() const { + return isVerdef() ? (const Elf_Verdef*)getPointer() : NULL; + } + const Elf_Vernaux *getVernaux() const { + return isVernaux() ? (const Elf_Vernaux*)getPointer() : NULL; + } + }; + mutable SmallVector VersionMap; + void LoadVersionDefs(const Elf_Shdr *sec) const; + void LoadVersionNeeds(const Elf_Shdr *ec) const; + void LoadVersionMap() const; + + /// @brief Map sections to an array of relocation sections that reference + /// them sorted by section index. + RelocMap_t SectionRelocMap; + + /// @brief Get the relocation section that contains \a Rel. + const Elf_Shdr *getRelSection(DataRefImpl Rel) const { + return getSection(Rel.w.b); + } + + bool isRelocationHasAddend(DataRefImpl Rel) const; + template + const T *getEntry(uint16_t Section, uint32_t Entry) const; + template + const T *getEntry(const Elf_Shdr *Section, uint32_t Entry) const; + const Elf_Shdr *getSection(DataRefImpl index) const; + const Elf_Shdr *getSection(uint32_t index) const; + const Elf_Rel *getRel(DataRefImpl Rel) const; + const Elf_Rela *getRela(DataRefImpl Rela) const; + const char *getString(uint32_t section, uint32_t offset) const; + const char *getString(const Elf_Shdr *section, uint32_t offset) const; + error_code getSymbolName(const Elf_Shdr *section, + const Elf_Sym *Symb, + StringRef &Res) const; + error_code getSymbolVersion(const Elf_Shdr *section, + const Elf_Sym *Symb, + StringRef &Version, + bool &IsDefault) const; + void VerifyStrTab(const Elf_Shdr *sh) const; + +protected: + const Elf_Sym *getSymbol(DataRefImpl Symb) const; // FIXME: Should be private? + void validateSymbol(DataRefImpl Symb) const; + +public: + const Elf_Dyn *getDyn(DataRefImpl DynData) const; + error_code getSymbolVersion(SymbolRef Symb, StringRef &Version, + bool &IsDefault) const; +protected: + virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const; + virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const; + virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const; + virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const; + virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const; + virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const; + virtual error_code getSymbolFlags(DataRefImpl Symb, uint32_t &Res) const; + virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::Type &Res) const; + virtual error_code getSymbolSection(DataRefImpl Symb, + section_iterator &Res) const; + + friend class DynRefImpl; + virtual error_code getDynNext(DataRefImpl DynData, DynRef &Result) const; + + virtual error_code getLibraryNext(DataRefImpl Data, LibraryRef &Result) const; + virtual error_code getLibraryPath(DataRefImpl Data, StringRef &Res) const; + + virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const; + virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const; + virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const; + virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const; + virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const; + virtual error_code getSectionAlignment(DataRefImpl Sec, uint64_t &Res) const; + virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const; + virtual error_code isSectionData(DataRefImpl Sec, bool &Res) const; + virtual error_code isSectionBSS(DataRefImpl Sec, bool &Res) const; + virtual error_code isSectionRequiredForExecution(DataRefImpl Sec, + bool &Res) const; + virtual error_code isSectionVirtual(DataRefImpl Sec, bool &Res) const; + virtual error_code isSectionZeroInit(DataRefImpl Sec, bool &Res) const; + virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb, + bool &Result) const; + virtual relocation_iterator getSectionRelBegin(DataRefImpl Sec) const; + virtual relocation_iterator getSectionRelEnd(DataRefImpl Sec) const; + + virtual error_code getRelocationNext(DataRefImpl Rel, + RelocationRef &Res) const; + virtual error_code getRelocationAddress(DataRefImpl Rel, + uint64_t &Res) const; + virtual error_code getRelocationOffset(DataRefImpl Rel, + uint64_t &Res) const; + virtual error_code getRelocationSymbol(DataRefImpl Rel, + SymbolRef &Res) const; + virtual error_code getRelocationType(DataRefImpl Rel, + uint64_t &Res) const; + virtual error_code getRelocationTypeName(DataRefImpl Rel, + SmallVectorImpl &Result) const; + virtual error_code getRelocationAdditionalInfo(DataRefImpl Rel, + int64_t &Res) const; + virtual error_code getRelocationValueString(DataRefImpl Rel, + SmallVectorImpl &Result) const; + +public: + ELFObjectFile(MemoryBuffer *Object, error_code &ec); + virtual symbol_iterator begin_symbols() const; + virtual symbol_iterator end_symbols() const; + + virtual symbol_iterator begin_dynamic_symbols() const; + virtual symbol_iterator end_dynamic_symbols() const; + + virtual section_iterator begin_sections() const; + virtual section_iterator end_sections() const; + + virtual library_iterator begin_libraries_needed() const; + virtual library_iterator end_libraries_needed() const; + + virtual dyn_iterator begin_dynamic_table() const; + virtual dyn_iterator end_dynamic_table() const; + + virtual uint8_t getBytesInAddress() const; + virtual StringRef getFileFormatName() const; + virtual StringRef getObjectType() const { return "ELF"; } + virtual unsigned getArch() const; + virtual StringRef getLoadName() const; + + uint64_t getNumSections() const; + uint64_t getStringTableIndex() const; + ELF::Elf64_Word getSymbolTableIndex(const Elf_Sym *symb) const; + const Elf_Shdr *getSection(const Elf_Sym *symb) const; + + // Methods for type inquiry through isa, cast, and dyn_cast + bool isDyldType() const { return isDyldELFObject; } + static inline bool classof(const Binary *v) { + return v->getType() == getELFType(target_endianness == support::little, + is64Bits); + } + static inline bool classof(const ELFObjectFile *v) { return true; } +}; + +// Iterate through the version definitions, and place each Elf_Verdef +// in the VersionMap according to its index. +template +void ELFObjectFile:: + LoadVersionDefs(const Elf_Shdr *sec) const { + unsigned vd_size = sec->sh_size; // Size of section in bytes + unsigned vd_count = sec->sh_info; // Number of Verdef entries + const char *sec_start = (const char*)base() + sec->sh_offset; + const char *sec_end = sec_start + vd_size; + // The first Verdef entry is at the start of the section. + const char *p = sec_start; + for (unsigned i = 0; i < vd_count; i++) { + if (p + sizeof(Elf_Verdef) > sec_end) + report_fatal_error("Section ended unexpectedly while scanning " + "version definitions."); + const Elf_Verdef *vd = reinterpret_cast(p); + if (vd->vd_version != ELF::VER_DEF_CURRENT) + report_fatal_error("Unexpected verdef version"); + size_t index = vd->vd_ndx & ELF::VERSYM_VERSION; + if (index >= VersionMap.size()) + VersionMap.resize(index+1); + VersionMap[index] = VersionMapEntry(vd); + p += vd->vd_next; + } +} + +// Iterate through the versions needed section, and place each Elf_Vernaux +// in the VersionMap according to its index. +template +void ELFObjectFile:: + LoadVersionNeeds(const Elf_Shdr *sec) const { + unsigned vn_size = sec->sh_size; // Size of section in bytes + unsigned vn_count = sec->sh_info; // Number of Verneed entries + const char *sec_start = (const char*)base() + sec->sh_offset; + const char *sec_end = sec_start + vn_size; + // The first Verneed entry is at the start of the section. + const char *p = sec_start; + for (unsigned i = 0; i < vn_count; i++) { + if (p + sizeof(Elf_Verneed) > sec_end) + report_fatal_error("Section ended unexpectedly while scanning " + "version needed records."); + const Elf_Verneed *vn = reinterpret_cast(p); + if (vn->vn_version != ELF::VER_NEED_CURRENT) + report_fatal_error("Unexpected verneed version"); + // Iterate through the Vernaux entries + const char *paux = p + vn->vn_aux; + for (unsigned j = 0; j < vn->vn_cnt; j++) { + if (paux + sizeof(Elf_Vernaux) > sec_end) + report_fatal_error("Section ended unexpected while scanning auxiliary " + "version needed records."); + const Elf_Vernaux *vna = reinterpret_cast(paux); + size_t index = vna->vna_other & ELF::VERSYM_VERSION; + if (index >= VersionMap.size()) + VersionMap.resize(index+1); + VersionMap[index] = VersionMapEntry(vna); + paux += vna->vna_next; + } + p += vn->vn_next; + } +} + +template +void ELFObjectFile::LoadVersionMap() const { + // If there is no dynamic symtab or version table, there is nothing to do. + if (SymbolTableSections[0] == NULL || dot_gnu_version_sec == NULL) + return; + + // Has the VersionMap already been loaded? + if (VersionMap.size() > 0) + return; + + // The first two version indexes are reserved. + // Index 0 is LOCAL, index 1 is GLOBAL. + VersionMap.push_back(VersionMapEntry()); + VersionMap.push_back(VersionMapEntry()); + + if (dot_gnu_version_d_sec) + LoadVersionDefs(dot_gnu_version_d_sec); + + if (dot_gnu_version_r_sec) + LoadVersionNeeds(dot_gnu_version_r_sec); +} + +template +void ELFObjectFile + ::validateSymbol(DataRefImpl Symb) const { + const Elf_Sym *symb = getSymbol(Symb); + const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b]; + // FIXME: We really need to do proper error handling in the case of an invalid + // input file. Because we don't use exceptions, I think we'll just pass + // an error object around. + if (!( symb + && SymbolTableSection + && symb >= (const Elf_Sym*)(base() + + SymbolTableSection->sh_offset) + && symb < (const Elf_Sym*)(base() + + SymbolTableSection->sh_offset + + SymbolTableSection->sh_size))) + // FIXME: Proper error handling. + report_fatal_error("Symb must point to a valid symbol!"); +} + +template +error_code ELFObjectFile + ::getSymbolNext(DataRefImpl Symb, + SymbolRef &Result) const { + validateSymbol(Symb); + const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b]; + + ++Symb.d.a; + // Check to see if we are at the end of this symbol table. + if (Symb.d.a >= SymbolTableSection->getEntityCount()) { + // We are at the end. If there are other symbol tables, jump to them. + // If the symbol table is .dynsym, we are iterating dynamic symbols, + // and there is only one table of these. + if (Symb.d.b != 0) { + ++Symb.d.b; + Symb.d.a = 1; // The 0th symbol in ELF is fake. + } + // Otherwise return the terminator. + if (Symb.d.b == 0 || Symb.d.b >= SymbolTableSections.size()) { + Symb.d.a = std::numeric_limits::max(); + Symb.d.b = std::numeric_limits::max(); + } + } + + Result = SymbolRef(Symb, this); + return object_error::success; +} + +template +error_code ELFObjectFile + ::getSymbolName(DataRefImpl Symb, + StringRef &Result) const { + validateSymbol(Symb); + const Elf_Sym *symb = getSymbol(Symb); + return getSymbolName(SymbolTableSections[Symb.d.b], symb, Result); +} + +template +error_code ELFObjectFile + ::getSymbolVersion(SymbolRef SymRef, + StringRef &Version, + bool &IsDefault) const { + DataRefImpl Symb = SymRef.getRawDataRefImpl(); + validateSymbol(Symb); + const Elf_Sym *symb = getSymbol(Symb); + return getSymbolVersion(SymbolTableSections[Symb.d.b], symb, + Version, IsDefault); +} + +template +ELF::Elf64_Word ELFObjectFile + ::getSymbolTableIndex(const Elf_Sym *symb) const { + if (symb->st_shndx == ELF::SHN_XINDEX) + return ExtendedSymbolTable.lookup(symb); + return symb->st_shndx; +} + +template +const typename ELFObjectFile::Elf_Shdr * +ELFObjectFile + ::getSection(const Elf_Sym *symb) const { + if (symb->st_shndx == ELF::SHN_XINDEX) + return getSection(ExtendedSymbolTable.lookup(symb)); + if (symb->st_shndx >= ELF::SHN_LORESERVE) + return 0; + return getSection(symb->st_shndx); +} + +template +error_code ELFObjectFile + ::getSymbolFileOffset(DataRefImpl Symb, + uint64_t &Result) const { + validateSymbol(Symb); + const Elf_Sym *symb = getSymbol(Symb); + const Elf_Shdr *Section; + switch (getSymbolTableIndex(symb)) { + case ELF::SHN_COMMON: + // Unintialized symbols have no offset in the object file + case ELF::SHN_UNDEF: + Result = UnknownAddressOrSize; + return object_error::success; + case ELF::SHN_ABS: + Result = symb->st_value; + return object_error::success; + default: Section = getSection(symb); + } + + switch (symb->getType()) { + case ELF::STT_SECTION: + Result = Section ? Section->sh_addr : UnknownAddressOrSize; + return object_error::success; + case ELF::STT_FUNC: + case ELF::STT_OBJECT: + case ELF::STT_NOTYPE: + Result = symb->st_value + + (Section ? Section->sh_offset : 0); + return object_error::success; + default: + Result = UnknownAddressOrSize; + return object_error::success; + } +} + +template +error_code ELFObjectFile + ::getSymbolAddress(DataRefImpl Symb, + uint64_t &Result) const { + validateSymbol(Symb); + const Elf_Sym *symb = getSymbol(Symb); + const Elf_Shdr *Section; + switch (getSymbolTableIndex(symb)) { + case ELF::SHN_COMMON: + case ELF::SHN_UNDEF: + Result = UnknownAddressOrSize; + return object_error::success; + case ELF::SHN_ABS: + Result = symb->st_value; + return object_error::success; + default: Section = getSection(symb); + } + + switch (symb->getType()) { + case ELF::STT_SECTION: + Result = Section ? Section->sh_addr : UnknownAddressOrSize; + return object_error::success; + case ELF::STT_FUNC: + case ELF::STT_OBJECT: + case ELF::STT_NOTYPE: + Result = symb->st_value + (Section ? Section->sh_addr : 0); + return object_error::success; + default: + Result = UnknownAddressOrSize; + return object_error::success; + } +} + +template +error_code ELFObjectFile + ::getSymbolSize(DataRefImpl Symb, + uint64_t &Result) const { + validateSymbol(Symb); + const Elf_Sym *symb = getSymbol(Symb); + if (symb->st_size == 0) + Result = UnknownAddressOrSize; + Result = symb->st_size; + return object_error::success; +} + +template +error_code ELFObjectFile + ::getSymbolNMTypeChar(DataRefImpl Symb, + char &Result) const { + validateSymbol(Symb); + const Elf_Sym *symb = getSymbol(Symb); + const Elf_Shdr *Section = getSection(symb); + + char ret = '?'; + + if (Section) { + switch (Section->sh_type) { + case ELF::SHT_PROGBITS: + case ELF::SHT_DYNAMIC: + switch (Section->sh_flags) { + case (ELF::SHF_ALLOC | ELF::SHF_EXECINSTR): + ret = 't'; break; + case (ELF::SHF_ALLOC | ELF::SHF_WRITE): + ret = 'd'; break; + case ELF::SHF_ALLOC: + case (ELF::SHF_ALLOC | ELF::SHF_MERGE): + case (ELF::SHF_ALLOC | ELF::SHF_MERGE | ELF::SHF_STRINGS): + ret = 'r'; break; + } + break; + case ELF::SHT_NOBITS: ret = 'b'; + } + } + + switch (getSymbolTableIndex(symb)) { + case ELF::SHN_UNDEF: + if (ret == '?') + ret = 'U'; + break; + case ELF::SHN_ABS: ret = 'a'; break; + case ELF::SHN_COMMON: ret = 'c'; break; + } + + switch (symb->getBinding()) { + case ELF::STB_GLOBAL: ret = ::toupper(ret); break; + case ELF::STB_WEAK: + if (getSymbolTableIndex(symb) == ELF::SHN_UNDEF) + ret = 'w'; + else + if (symb->getType() == ELF::STT_OBJECT) + ret = 'V'; + else + ret = 'W'; + } + + if (ret == '?' && symb->getType() == ELF::STT_SECTION) { + StringRef name; + if (error_code ec = getSymbolName(Symb, name)) + return ec; + Result = StringSwitch(name) + .StartsWith(".debug", 'N') + .StartsWith(".note", 'n') + .Default('?'); + return object_error::success; + } + + Result = ret; + return object_error::success; +} + +template +error_code ELFObjectFile + ::getSymbolType(DataRefImpl Symb, + SymbolRef::Type &Result) const { + validateSymbol(Symb); + const Elf_Sym *symb = getSymbol(Symb); + + switch (symb->getType()) { + case ELF::STT_NOTYPE: + Result = SymbolRef::ST_Unknown; + break; + case ELF::STT_SECTION: + Result = SymbolRef::ST_Debug; + break; + case ELF::STT_FILE: + Result = SymbolRef::ST_File; + break; + case ELF::STT_FUNC: + Result = SymbolRef::ST_Function; + break; + case ELF::STT_OBJECT: + case ELF::STT_COMMON: + case ELF::STT_TLS: + Result = SymbolRef::ST_Data; + break; + default: + Result = SymbolRef::ST_Other; + break; + } + return object_error::success; +} + +template +error_code ELFObjectFile + ::getSymbolFlags(DataRefImpl Symb, + uint32_t &Result) const { + validateSymbol(Symb); + const Elf_Sym *symb = getSymbol(Symb); + + Result = SymbolRef::SF_None; + + if (symb->getBinding() != ELF::STB_LOCAL) + Result |= SymbolRef::SF_Global; + + if (symb->getBinding() == ELF::STB_WEAK) + Result |= SymbolRef::SF_Weak; + + if (symb->st_shndx == ELF::SHN_ABS) + Result |= SymbolRef::SF_Absolute; + + if (symb->getType() == ELF::STT_FILE || + symb->getType() == ELF::STT_SECTION) + Result |= SymbolRef::SF_FormatSpecific; + + if (getSymbolTableIndex(symb) == ELF::SHN_UNDEF) + Result |= SymbolRef::SF_Undefined; + + if (symb->getType() == ELF::STT_COMMON || + getSymbolTableIndex(symb) == ELF::SHN_COMMON) + Result |= SymbolRef::SF_Common; + + if (symb->getType() == ELF::STT_TLS) + Result |= SymbolRef::SF_ThreadLocal; + + return object_error::success; +} + +template +error_code ELFObjectFile + ::getSymbolSection(DataRefImpl Symb, + section_iterator &Res) const { + validateSymbol(Symb); + const Elf_Sym *symb = getSymbol(Symb); + const Elf_Shdr *sec = getSection(symb); + if (!sec) + Res = end_sections(); + else { + DataRefImpl Sec; + Sec.p = reinterpret_cast(sec); + Res = section_iterator(SectionRef(Sec, this)); + } + return object_error::success; +} + +template +error_code ELFObjectFile + ::getSectionNext(DataRefImpl Sec, SectionRef &Result) const { + const uint8_t *sec = reinterpret_cast(Sec.p); + sec += Header->e_shentsize; + Sec.p = reinterpret_cast(sec); + Result = SectionRef(Sec, this); + return object_error::success; +} + +template +error_code ELFObjectFile + ::getSectionName(DataRefImpl Sec, + StringRef &Result) const { + const Elf_Shdr *sec = reinterpret_cast(Sec.p); + Result = StringRef(getString(dot_shstrtab_sec, sec->sh_name)); + return object_error::success; +} + +template +error_code ELFObjectFile + ::getSectionAddress(DataRefImpl Sec, + uint64_t &Result) const { + const Elf_Shdr *sec = reinterpret_cast(Sec.p); + Result = sec->sh_addr; + return object_error::success; +} + +template +error_code ELFObjectFile + ::getSectionSize(DataRefImpl Sec, + uint64_t &Result) const { + const Elf_Shdr *sec = reinterpret_cast(Sec.p); + Result = sec->sh_size; + return object_error::success; +} + +template +error_code ELFObjectFile + ::getSectionContents(DataRefImpl Sec, + StringRef &Result) const { + const Elf_Shdr *sec = reinterpret_cast(Sec.p); + const char *start = (const char*)base() + sec->sh_offset; + Result = StringRef(start, sec->sh_size); + return object_error::success; +} + +template +error_code ELFObjectFile + ::getSectionAlignment(DataRefImpl Sec, + uint64_t &Result) const { + const Elf_Shdr *sec = reinterpret_cast(Sec.p); + Result = sec->sh_addralign; + return object_error::success; +} + +template +error_code ELFObjectFile + ::isSectionText(DataRefImpl Sec, + bool &Result) const { + const Elf_Shdr *sec = reinterpret_cast(Sec.p); + if (sec->sh_flags & ELF::SHF_EXECINSTR) + Result = true; + else + Result = false; + return object_error::success; +} + +template +error_code ELFObjectFile + ::isSectionData(DataRefImpl Sec, + bool &Result) const { + const Elf_Shdr *sec = reinterpret_cast(Sec.p); + if (sec->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE) + && sec->sh_type == ELF::SHT_PROGBITS) + Result = true; + else + Result = false; + return object_error::success; +} + +template +error_code ELFObjectFile + ::isSectionBSS(DataRefImpl Sec, + bool &Result) const { + const Elf_Shdr *sec = reinterpret_cast(Sec.p); + if (sec->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE) + && sec->sh_type == ELF::SHT_NOBITS) + Result = true; + else + Result = false; + return object_error::success; +} + +template +error_code ELFObjectFile + ::isSectionRequiredForExecution(DataRefImpl Sec, + bool &Result) const { + const Elf_Shdr *sec = reinterpret_cast(Sec.p); + if (sec->sh_flags & ELF::SHF_ALLOC) + Result = true; + else + Result = false; + return object_error::success; +} + +template +error_code ELFObjectFile + ::isSectionVirtual(DataRefImpl Sec, + bool &Result) const { + const Elf_Shdr *sec = reinterpret_cast(Sec.p); + if (sec->sh_type == ELF::SHT_NOBITS) + Result = true; + else + Result = false; + return object_error::success; +} + +template +error_code ELFObjectFile::isSectionZeroInit(DataRefImpl Sec, + bool &Result) const { + const Elf_Shdr *sec = reinterpret_cast(Sec.p); + // For ELF, all zero-init sections are virtual (that is, they occupy no space + // in the object image) and vice versa. + if (sec->sh_flags & ELF::SHT_NOBITS) + Result = true; + else + Result = false; + return object_error::success; +} + +template +error_code ELFObjectFile + ::sectionContainsSymbol(DataRefImpl Sec, + DataRefImpl Symb, + bool &Result) const { + // FIXME: Unimplemented. + Result = false; + return object_error::success; +} + +template +relocation_iterator ELFObjectFile + ::getSectionRelBegin(DataRefImpl Sec) const { + DataRefImpl RelData; + const Elf_Shdr *sec = reinterpret_cast(Sec.p); + typename RelocMap_t::const_iterator ittr = SectionRelocMap.find(sec); + if (sec != 0 && ittr != SectionRelocMap.end()) { + RelData.w.a = getSection(ittr->second[0])->sh_info; + RelData.w.b = ittr->second[0]; + RelData.w.c = 0; + } + return relocation_iterator(RelocationRef(RelData, this)); +} + +template +relocation_iterator ELFObjectFile + ::getSectionRelEnd(DataRefImpl Sec) const { + DataRefImpl RelData; + const Elf_Shdr *sec = reinterpret_cast(Sec.p); + typename RelocMap_t::const_iterator ittr = SectionRelocMap.find(sec); + if (sec != 0 && ittr != SectionRelocMap.end()) { + // Get the index of the last relocation section for this section. + std::size_t relocsecindex = ittr->second[ittr->second.size() - 1]; + const Elf_Shdr *relocsec = getSection(relocsecindex); + RelData.w.a = relocsec->sh_info; + RelData.w.b = relocsecindex; + RelData.w.c = relocsec->sh_size / relocsec->sh_entsize; + } + return relocation_iterator(RelocationRef(RelData, this)); +} + +// Relocations +template +error_code ELFObjectFile + ::getRelocationNext(DataRefImpl Rel, + RelocationRef &Result) const { + ++Rel.w.c; + const Elf_Shdr *relocsec = getSection(Rel.w.b); + if (Rel.w.c >= (relocsec->sh_size / relocsec->sh_entsize)) { + // We have reached the end of the relocations for this section. See if there + // is another relocation section. + typename RelocMap_t::mapped_type relocseclist = + SectionRelocMap.lookup(getSection(Rel.w.a)); + + // Do a binary search for the current reloc section index (which must be + // present). Then get the next one. + typename RelocMap_t::mapped_type::const_iterator loc = + std::lower_bound(relocseclist.begin(), relocseclist.end(), Rel.w.b); + ++loc; + + // If there is no next one, don't do anything. The ++Rel.w.c above sets Rel + // to the end iterator. + if (loc != relocseclist.end()) { + Rel.w.b = *loc; + Rel.w.a = 0; + } + } + Result = RelocationRef(Rel, this); + return object_error::success; +} + +template +error_code ELFObjectFile + ::getRelocationSymbol(DataRefImpl Rel, + SymbolRef &Result) const { + uint32_t symbolIdx; + const Elf_Shdr *sec = getSection(Rel.w.b); + switch (sec->sh_type) { + default : + report_fatal_error("Invalid section type in Rel!"); + case ELF::SHT_REL : { + symbolIdx = getRel(Rel)->getSymbol(); + break; + } + case ELF::SHT_RELA : { + symbolIdx = getRela(Rel)->getSymbol(); + break; + } + } + DataRefImpl SymbolData; + IndexMap_t::const_iterator it = SymbolTableSectionsIndexMap.find(sec->sh_link); + if (it == SymbolTableSectionsIndexMap.end()) + report_fatal_error("Relocation symbol table not found!"); + SymbolData.d.a = symbolIdx; + SymbolData.d.b = it->second; + Result = SymbolRef(SymbolData, this); + return object_error::success; +} + +template +error_code ELFObjectFile + ::getRelocationAddress(DataRefImpl Rel, + uint64_t &Result) const { + uint64_t offset; + const Elf_Shdr *sec = getSection(Rel.w.b); + switch (sec->sh_type) { + default : + report_fatal_error("Invalid section type in Rel!"); + case ELF::SHT_REL : { + offset = getRel(Rel)->r_offset; + break; + } + case ELF::SHT_RELA : { + offset = getRela(Rel)->r_offset; + break; + } + } + + Result = offset; + return object_error::success; +} + +template +error_code ELFObjectFile + ::getRelocationOffset(DataRefImpl Rel, + uint64_t &Result) const { + uint64_t offset; + const Elf_Shdr *sec = getSection(Rel.w.b); + switch (sec->sh_type) { + default : + report_fatal_error("Invalid section type in Rel!"); + case ELF::SHT_REL : { + offset = getRel(Rel)->r_offset; + break; + } + case ELF::SHT_RELA : { + offset = getRela(Rel)->r_offset; + break; + } + } + + Result = offset - sec->sh_addr; + return object_error::success; +} + +template +error_code ELFObjectFile + ::getRelocationType(DataRefImpl Rel, + uint64_t &Result) const { + const Elf_Shdr *sec = getSection(Rel.w.b); + switch (sec->sh_type) { + default : + report_fatal_error("Invalid section type in Rel!"); + case ELF::SHT_REL : { + Result = getRel(Rel)->getType(); + break; + } + case ELF::SHT_RELA : { + Result = getRela(Rel)->getType(); + break; + } + } + return object_error::success; +} + +#define LLVM_ELF_SWITCH_RELOC_TYPE_NAME(enum) \ + case ELF::enum: res = #enum; break; + +template +error_code ELFObjectFile + ::getRelocationTypeName(DataRefImpl Rel, + SmallVectorImpl &Result) const { + const Elf_Shdr *sec = getSection(Rel.w.b); + uint8_t type; + StringRef res; + switch (sec->sh_type) { + default : + return object_error::parse_failed; + case ELF::SHT_REL : { + type = getRel(Rel)->getType(); + break; + } + case ELF::SHT_RELA : { + type = getRela(Rel)->getType(); + break; + } + } + switch (Header->e_machine) { + case ELF::EM_X86_64: + switch (type) { + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_NONE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOT32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PLT32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_COPY); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GLOB_DAT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_JUMP_SLOT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_RELATIVE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_32S); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPMOD64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPOFF64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TPOFF64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSGD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSLD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTTPOFF); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TPOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTOFF64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32_TLSDESC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC); + default: + res = "Unknown"; + } + break; + case ELF::EM_386: + switch (type) { + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_NONE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOT32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PLT32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_COPY); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GLOB_DAT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_JUMP_SLOT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_RELATIVE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOTOFF); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOTPC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_32PLT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_TPOFF); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_IE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GOTIE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_PUSH); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_POP); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_PUSH); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_POP); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDO_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_IE_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LE_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DTPMOD32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DTPOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_TPOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GOTDESC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_IRELATIVE); + default: + res = "Unknown"; + } + break; + default: + res = "Unknown"; + } + Result.append(res.begin(), res.end()); + return object_error::success; +} + +#undef LLVM_ELF_SWITCH_RELOC_TYPE_NAME + +template +error_code ELFObjectFile + ::getRelocationAdditionalInfo(DataRefImpl Rel, + int64_t &Result) const { + const Elf_Shdr *sec = getSection(Rel.w.b); + switch (sec->sh_type) { + default : + report_fatal_error("Invalid section type in Rel!"); + case ELF::SHT_REL : { + Result = 0; + return object_error::success; + } + case ELF::SHT_RELA : { + Result = getRela(Rel)->r_addend; + return object_error::success; + } + } +} + +template +error_code ELFObjectFile + ::getRelocationValueString(DataRefImpl Rel, + SmallVectorImpl &Result) const { + const Elf_Shdr *sec = getSection(Rel.w.b); + uint8_t type; + StringRef res; + int64_t addend = 0; + uint16_t symbol_index = 0; + switch (sec->sh_type) { + default : + return object_error::parse_failed; + case ELF::SHT_REL : { + type = getRel(Rel)->getType(); + symbol_index = getRel(Rel)->getSymbol(); + // TODO: Read implicit addend from section data. + break; + } + case ELF::SHT_RELA : { + type = getRela(Rel)->getType(); + symbol_index = getRela(Rel)->getSymbol(); + addend = getRela(Rel)->r_addend; + break; + } + } + const Elf_Sym *symb = getEntry(sec->sh_link, symbol_index); + StringRef symname; + if (error_code ec = getSymbolName(getSection(sec->sh_link), symb, symname)) + return ec; + switch (Header->e_machine) { + case ELF::EM_X86_64: + switch (type) { + case ELF::R_X86_64_32S: + res = symname; + break; + case ELF::R_X86_64_PC32: { + std::string fmtbuf; + raw_string_ostream fmt(fmtbuf); + fmt << symname << (addend < 0 ? "" : "+") << addend << "-P"; + fmt.flush(); + Result.append(fmtbuf.begin(), fmtbuf.end()); + } + break; + default: + res = "Unknown"; + } + break; + default: + res = "Unknown"; + } + if (Result.empty()) + Result.append(res.begin(), res.end()); + return object_error::success; +} + +// Verify that the last byte in the string table in a null. +template +void ELFObjectFile + ::VerifyStrTab(const Elf_Shdr *sh) const { + const char *strtab = (const char*)base() + sh->sh_offset; + if (strtab[sh->sh_size - 1] != 0) + // FIXME: Proper error handling. + report_fatal_error("String table must end with a null terminator!"); +} + +template +ELFObjectFile::ELFObjectFile(MemoryBuffer *Object + , error_code &ec) + : ObjectFile(getELFType(target_endianness == support::little, is64Bits), + Object, ec) + , isDyldELFObject(false) + , SectionHeaderTable(0) + , dot_shstrtab_sec(0) + , dot_strtab_sec(0) + , dot_dynstr_sec(0) + , dot_dynamic_sec(0) + , dot_gnu_version_sec(0) + , dot_gnu_version_r_sec(0) + , dot_gnu_version_d_sec(0) + , dt_soname(0) + { + + const uint64_t FileSize = Data->getBufferSize(); + + if (sizeof(Elf_Ehdr) > FileSize) + // FIXME: Proper error handling. + report_fatal_error("File too short!"); + + Header = reinterpret_cast(base()); + + if (Header->e_shoff == 0) + return; + + const uint64_t SectionTableOffset = Header->e_shoff; + + if (SectionTableOffset + sizeof(Elf_Shdr) > FileSize) + // FIXME: Proper error handling. + report_fatal_error("Section header table goes past end of file!"); + + // The getNumSections() call below depends on SectionHeaderTable being set. + SectionHeaderTable = + reinterpret_cast(base() + SectionTableOffset); + const uint64_t SectionTableSize = getNumSections() * Header->e_shentsize; + + if (SectionTableOffset + SectionTableSize > FileSize) + // FIXME: Proper error handling. + report_fatal_error("Section table goes past end of file!"); + + // To find the symbol tables we walk the section table to find SHT_SYMTAB. + const Elf_Shdr* SymbolTableSectionHeaderIndex = 0; + const Elf_Shdr* sh = SectionHeaderTable; + + // Reserve SymbolTableSections[0] for .dynsym + SymbolTableSections.push_back(NULL); + + for (uint64_t i = 0, e = getNumSections(); i != e; ++i) { + switch (sh->sh_type) { + case ELF::SHT_SYMTAB_SHNDX: { + if (SymbolTableSectionHeaderIndex) + // FIXME: Proper error handling. + report_fatal_error("More than one .symtab_shndx!"); + SymbolTableSectionHeaderIndex = sh; + break; + } + case ELF::SHT_SYMTAB: { + SymbolTableSectionsIndexMap[i] = SymbolTableSections.size(); + SymbolTableSections.push_back(sh); + break; + } + case ELF::SHT_DYNSYM: { + if (SymbolTableSections[0] != NULL) + // FIXME: Proper error handling. + report_fatal_error("More than one .dynsym!"); + SymbolTableSectionsIndexMap[i] = 0; + SymbolTableSections[0] = sh; + break; + } + case ELF::SHT_REL: + case ELF::SHT_RELA: { + SectionRelocMap[getSection(sh->sh_info)].push_back(i); + break; + } + case ELF::SHT_DYNAMIC: { + if (dot_dynamic_sec != NULL) + // FIXME: Proper error handling. + report_fatal_error("More than one .dynamic!"); + dot_dynamic_sec = sh; + break; + } + case ELF::SHT_GNU_versym: { + if (dot_gnu_version_sec != NULL) + // FIXME: Proper error handling. + report_fatal_error("More than one .gnu.version section!"); + dot_gnu_version_sec = sh; + break; + } + case ELF::SHT_GNU_verdef: { + if (dot_gnu_version_d_sec != NULL) + // FIXME: Proper error handling. + report_fatal_error("More than one .gnu.version_d section!"); + dot_gnu_version_d_sec = sh; + break; + } + case ELF::SHT_GNU_verneed: { + if (dot_gnu_version_r_sec != NULL) + // FIXME: Proper error handling. + report_fatal_error("More than one .gnu.version_r section!"); + dot_gnu_version_r_sec = sh; + break; + } + } + ++sh; + } + + // Sort section relocation lists by index. + for (typename RelocMap_t::iterator i = SectionRelocMap.begin(), + e = SectionRelocMap.end(); i != e; ++i) { + std::sort(i->second.begin(), i->second.end()); + } + + // Get string table sections. + dot_shstrtab_sec = getSection(getStringTableIndex()); + if (dot_shstrtab_sec) { + // Verify that the last byte in the string table in a null. + VerifyStrTab(dot_shstrtab_sec); + } + + // Merge this into the above loop. + for (const char *i = reinterpret_cast(SectionHeaderTable), + *e = i + getNumSections() * Header->e_shentsize; + i != e; i += Header->e_shentsize) { + const Elf_Shdr *sh = reinterpret_cast(i); + if (sh->sh_type == ELF::SHT_STRTAB) { + StringRef SectionName(getString(dot_shstrtab_sec, sh->sh_name)); + if (SectionName == ".strtab") { + if (dot_strtab_sec != 0) + // FIXME: Proper error handling. + report_fatal_error("Already found section named .strtab!"); + dot_strtab_sec = sh; + VerifyStrTab(dot_strtab_sec); + } else if (SectionName == ".dynstr") { + if (dot_dynstr_sec != 0) + // FIXME: Proper error handling. + report_fatal_error("Already found section named .dynstr!"); + dot_dynstr_sec = sh; + VerifyStrTab(dot_dynstr_sec); + } + } + } + + // Build symbol name side-mapping if there is one. + if (SymbolTableSectionHeaderIndex) { + const Elf_Word *ShndxTable = reinterpret_cast(base() + + SymbolTableSectionHeaderIndex->sh_offset); + error_code ec; + for (symbol_iterator si = begin_symbols(), + se = end_symbols(); si != se; si.increment(ec)) { + if (ec) + report_fatal_error("Fewer extended symbol table entries than symbols!"); + if (*ShndxTable != ELF::SHN_UNDEF) + ExtendedSymbolTable[getSymbol(si->getRawDataRefImpl())] = *ShndxTable; + ++ShndxTable; + } + } +} + +template +symbol_iterator ELFObjectFile + ::begin_symbols() const { + DataRefImpl SymbolData; + if (SymbolTableSections.size() <= 1) { + SymbolData.d.a = std::numeric_limits::max(); + SymbolData.d.b = std::numeric_limits::max(); + } else { + SymbolData.d.a = 1; // The 0th symbol in ELF is fake. + SymbolData.d.b = 1; // The 0th table is .dynsym + } + return symbol_iterator(SymbolRef(SymbolData, this)); +} + +template +symbol_iterator ELFObjectFile + ::end_symbols() const { + DataRefImpl SymbolData; + SymbolData.d.a = std::numeric_limits::max(); + SymbolData.d.b = std::numeric_limits::max(); + return symbol_iterator(SymbolRef(SymbolData, this)); +} + +template +symbol_iterator ELFObjectFile + ::begin_dynamic_symbols() const { + DataRefImpl SymbolData; + if (SymbolTableSections[0] == NULL) { + SymbolData.d.a = std::numeric_limits::max(); + SymbolData.d.b = std::numeric_limits::max(); + } else { + SymbolData.d.a = 1; // The 0th symbol in ELF is fake. + SymbolData.d.b = 0; // The 0th table is .dynsym + } + return symbol_iterator(SymbolRef(SymbolData, this)); +} + +template +symbol_iterator ELFObjectFile + ::end_dynamic_symbols() const { + DataRefImpl SymbolData; + SymbolData.d.a = std::numeric_limits::max(); + SymbolData.d.b = std::numeric_limits::max(); + return symbol_iterator(SymbolRef(SymbolData, this)); +} + +template +section_iterator ELFObjectFile + ::begin_sections() const { + DataRefImpl ret; + ret.p = reinterpret_cast(base() + Header->e_shoff); + return section_iterator(SectionRef(ret, this)); +} + +template +section_iterator ELFObjectFile + ::end_sections() const { + DataRefImpl ret; + ret.p = reinterpret_cast(base() + + Header->e_shoff + + (Header->e_shentsize*getNumSections())); + return section_iterator(SectionRef(ret, this)); +} + +template +typename ELFObjectFile::dyn_iterator +ELFObjectFile::begin_dynamic_table() const { + DataRefImpl DynData; + if (dot_dynamic_sec == NULL || dot_dynamic_sec->sh_size == 0) { + DynData.d.a = std::numeric_limits::max(); + } else { + DynData.d.a = 0; + } + return dyn_iterator(DynRef(DynData, this)); +} + +template +typename ELFObjectFile::dyn_iterator +ELFObjectFile + ::end_dynamic_table() const { + DataRefImpl DynData; + DynData.d.a = std::numeric_limits::max(); + return dyn_iterator(DynRef(DynData, this)); +} + +template +error_code ELFObjectFile + ::getDynNext(DataRefImpl DynData, + DynRef &Result) const { + ++DynData.d.a; + + // Check to see if we are at the end of .dynamic + if (DynData.d.a >= dot_dynamic_sec->getEntityCount()) { + // We are at the end. Return the terminator. + DynData.d.a = std::numeric_limits::max(); + } + + Result = DynRef(DynData, this); + return object_error::success; +} + +template +StringRef +ELFObjectFile::getLoadName() const { + if (!dt_soname) { + // Find the DT_SONAME entry + dyn_iterator it = begin_dynamic_table(); + dyn_iterator ie = end_dynamic_table(); + error_code ec; + while (it != ie) { + if (it->getTag() == ELF::DT_SONAME) + break; + it.increment(ec); + if (ec) + report_fatal_error("dynamic table iteration failed"); + } + if (it != ie) { + if (dot_dynstr_sec == NULL) + report_fatal_error("Dynamic string table is missing"); + dt_soname = getString(dot_dynstr_sec, it->getVal()); + } else { + dt_soname = ""; + } + } + return dt_soname; +} + +template +library_iterator ELFObjectFile + ::begin_libraries_needed() const { + // Find the first DT_NEEDED entry + dyn_iterator i = begin_dynamic_table(); + dyn_iterator e = end_dynamic_table(); + error_code ec; + while (i != e) { + if (i->getTag() == ELF::DT_NEEDED) + break; + i.increment(ec); + if (ec) + report_fatal_error("dynamic table iteration failed"); + } + // Use the same DataRefImpl format as DynRef. + return library_iterator(LibraryRef(i->getRawDataRefImpl(), this)); +} + +template +error_code ELFObjectFile + ::getLibraryNext(DataRefImpl Data, + LibraryRef &Result) const { + // Use the same DataRefImpl format as DynRef. + dyn_iterator i = dyn_iterator(DynRef(Data, this)); + dyn_iterator e = end_dynamic_table(); + + // Skip the current dynamic table entry. + error_code ec; + if (i != e) { + i.increment(ec); + // TODO: proper error handling + if (ec) + report_fatal_error("dynamic table iteration failed"); + } + + // Find the next DT_NEEDED entry. + while (i != e) { + if (i->getTag() == ELF::DT_NEEDED) + break; + i.increment(ec); + if (ec) + report_fatal_error("dynamic table iteration failed"); + } + Result = LibraryRef(i->getRawDataRefImpl(), this); + return object_error::success; +} + +template +error_code ELFObjectFile + ::getLibraryPath(DataRefImpl Data, StringRef &Res) const { + dyn_iterator i = dyn_iterator(DynRef(Data, this)); + if (i == end_dynamic_table()) + report_fatal_error("getLibraryPath() called on iterator end"); + + if (i->getTag() != ELF::DT_NEEDED) + report_fatal_error("Invalid library_iterator"); + + // This uses .dynstr to lookup the name of the DT_NEEDED entry. + // THis works as long as DT_STRTAB == .dynstr. This is true most of + // the time, but the specification allows exceptions. + // TODO: This should really use DT_STRTAB instead. Doing this requires + // reading the program headers. + if (dot_dynstr_sec == NULL) + report_fatal_error("Dynamic string table is missing"); + Res = getString(dot_dynstr_sec, i->getVal()); + return object_error::success; +} + +template +library_iterator ELFObjectFile + ::end_libraries_needed() const { + dyn_iterator e = end_dynamic_table(); + // Use the same DataRefImpl format as DynRef. + return library_iterator(LibraryRef(e->getRawDataRefImpl(), this)); +} + +template +uint8_t ELFObjectFile::getBytesInAddress() const { + return is64Bits ? 8 : 4; +} + +template +StringRef ELFObjectFile + ::getFileFormatName() const { + switch(Header->e_ident[ELF::EI_CLASS]) { + case ELF::ELFCLASS32: + switch(Header->e_machine) { + case ELF::EM_386: + return "ELF32-i386"; + case ELF::EM_X86_64: + return "ELF32-x86-64"; + case ELF::EM_ARM: + return "ELF32-arm"; + default: + return "ELF32-unknown"; + } + case ELF::ELFCLASS64: + switch(Header->e_machine) { + case ELF::EM_386: + return "ELF64-i386"; + case ELF::EM_X86_64: + return "ELF64-x86-64"; + default: + return "ELF64-unknown"; + } + default: + // FIXME: Proper error handling. + report_fatal_error("Invalid ELFCLASS!"); + } +} + +template +unsigned ELFObjectFile::getArch() const { + switch(Header->e_machine) { + case ELF::EM_386: + return Triple::x86; + case ELF::EM_X86_64: + return Triple::x86_64; + case ELF::EM_ARM: + return Triple::arm; + default: + return Triple::UnknownArch; + } +} + +template +uint64_t ELFObjectFile::getNumSections() const { + assert(Header && "Header not initialized!"); + if (Header->e_shnum == ELF::SHN_UNDEF) { + assert(SectionHeaderTable && "SectionHeaderTable not initialized!"); + return SectionHeaderTable->sh_size; + } + return Header->e_shnum; +} + +template +uint64_t +ELFObjectFile::getStringTableIndex() const { + if (Header->e_shnum == ELF::SHN_UNDEF) { + if (Header->e_shstrndx == ELF::SHN_HIRESERVE) + return SectionHeaderTable->sh_link; + if (Header->e_shstrndx >= getNumSections()) + return 0; + } + return Header->e_shstrndx; +} + + +template +template +inline const T * +ELFObjectFile::getEntry(uint16_t Section, + uint32_t Entry) const { + return getEntry(getSection(Section), Entry); +} + +template +template +inline const T * +ELFObjectFile::getEntry(const Elf_Shdr * Section, + uint32_t Entry) const { + return reinterpret_cast( + base() + + Section->sh_offset + + (Entry * Section->sh_entsize)); +} + +template +const typename ELFObjectFile::Elf_Sym * +ELFObjectFile::getSymbol(DataRefImpl Symb) const { + return getEntry(SymbolTableSections[Symb.d.b], Symb.d.a); +} + +template +const typename ELFObjectFile::Elf_Dyn * +ELFObjectFile::getDyn(DataRefImpl DynData) const { + return getEntry(dot_dynamic_sec, DynData.d.a); +} + +template +const typename ELFObjectFile::Elf_Rel * +ELFObjectFile::getRel(DataRefImpl Rel) const { + return getEntry(Rel.w.b, Rel.w.c); +} + +template +const typename ELFObjectFile::Elf_Rela * +ELFObjectFile::getRela(DataRefImpl Rela) const { + return getEntry(Rela.w.b, Rela.w.c); +} + +template +const typename ELFObjectFile::Elf_Shdr * +ELFObjectFile::getSection(DataRefImpl Symb) const { + const Elf_Shdr *sec = getSection(Symb.d.b); + if (sec->sh_type != ELF::SHT_SYMTAB || sec->sh_type != ELF::SHT_DYNSYM) + // FIXME: Proper error handling. + report_fatal_error("Invalid symbol table section!"); + return sec; +} + +template +const typename ELFObjectFile::Elf_Shdr * +ELFObjectFile::getSection(uint32_t index) const { + if (index == 0) + return 0; + if (!SectionHeaderTable || index >= getNumSections()) + // FIXME: Proper error handling. + report_fatal_error("Invalid section index!"); + + return reinterpret_cast( + reinterpret_cast(SectionHeaderTable) + + (index * Header->e_shentsize)); +} + +template +const char *ELFObjectFile + ::getString(uint32_t section, + ELF::Elf32_Word offset) const { + return getString(getSection(section), offset); +} + +template +const char *ELFObjectFile + ::getString(const Elf_Shdr *section, + ELF::Elf32_Word offset) const { + assert(section && section->sh_type == ELF::SHT_STRTAB && "Invalid section!"); + if (offset >= section->sh_size) + // FIXME: Proper error handling. + report_fatal_error("Symbol name offset outside of string table!"); + return (const char *)base() + section->sh_offset + offset; +} + +template +error_code ELFObjectFile + ::getSymbolName(const Elf_Shdr *section, + const Elf_Sym *symb, + StringRef &Result) const { + if (symb->st_name == 0) { + const Elf_Shdr *section = getSection(symb); + if (!section) + Result = ""; + else + Result = getString(dot_shstrtab_sec, section->sh_name); + return object_error::success; + } + + if (section == SymbolTableSections[0]) { + // Symbol is in .dynsym, use .dynstr string table + Result = getString(dot_dynstr_sec, symb->st_name); + } else { + // Use the default symbol table name section. + Result = getString(dot_strtab_sec, symb->st_name); + } + return object_error::success; +} + +template +error_code ELFObjectFile + ::getSymbolVersion(const Elf_Shdr *section, + const Elf_Sym *symb, + StringRef &Version, + bool &IsDefault) const { + // Handle non-dynamic symbols. + if (section != SymbolTableSections[0]) { + // Non-dynamic symbols can have versions in their names + // A name of the form 'foo@V1' indicates version 'V1', non-default. + // A name of the form 'foo@@V2' indicates version 'V2', default version. + StringRef Name; + error_code ec = getSymbolName(section, symb, Name); + if (ec != object_error::success) + return ec; + size_t atpos = Name.find('@'); + if (atpos == StringRef::npos) { + Version = ""; + IsDefault = false; + return object_error::success; + } + ++atpos; + if (atpos < Name.size() && Name[atpos] == '@') { + IsDefault = true; + ++atpos; + } else { + IsDefault = false; + } + Version = Name.substr(atpos); + return object_error::success; + } + + // This is a dynamic symbol. Look in the GNU symbol version table. + if (dot_gnu_version_sec == NULL) { + // No version table. + Version = ""; + IsDefault = false; + return object_error::success; + } + + // Determine the position in the symbol table of this entry. + const char *sec_start = (const char*)base() + section->sh_offset; + size_t entry_index = ((const char*)symb - sec_start)/section->sh_entsize; + + // Get the corresponding version index entry + const Elf_Versym *vs = getEntry(dot_gnu_version_sec, entry_index); + size_t version_index = vs->vs_index & ELF::VERSYM_VERSION; + + // Special markers for unversioned symbols. + if (version_index == ELF::VER_NDX_LOCAL || + version_index == ELF::VER_NDX_GLOBAL) { + Version = ""; + IsDefault = false; + return object_error::success; + } + + // Lookup this symbol in the version table + LoadVersionMap(); + if (version_index >= VersionMap.size() || VersionMap[version_index].isNull()) + report_fatal_error("Symbol has version index without corresponding " + "define or reference entry"); + const VersionMapEntry &entry = VersionMap[version_index]; + + // Get the version name string + size_t name_offset; + if (entry.isVerdef()) { + // The first Verdaux entry holds the name. + name_offset = entry.getVerdef()->getAux()->vda_name; + } else { + name_offset = entry.getVernaux()->vna_name; + } + Version = getString(dot_dynstr_sec, name_offset); + + // Set IsDefault + if (entry.isVerdef()) { + IsDefault = !(vs->vs_index & ELF::VERSYM_HIDDEN); + } else { + IsDefault = false; + } + + return object_error::success; +} + +template +inline DynRefImpl + ::DynRefImpl(DataRefImpl DynP, const OwningType *Owner) + : DynPimpl(DynP) + , OwningObject(Owner) {} + +template +inline bool DynRefImpl + ::operator==(const DynRefImpl &Other) const { + return DynPimpl == Other.DynPimpl; +} + +template +inline bool DynRefImpl + ::operator <(const DynRefImpl &Other) const { + return DynPimpl < Other.DynPimpl; +} + +template +inline error_code DynRefImpl + ::getNext(DynRefImpl &Result) const { + return OwningObject->getDynNext(DynPimpl, Result); +} + +template +inline int64_t DynRefImpl + ::getTag() const { + return OwningObject->getDyn(DynPimpl)->d_tag; +} + +template +inline uint64_t DynRefImpl + ::getVal() const { + return OwningObject->getDyn(DynPimpl)->d_un.d_val; +} + +template +inline uint64_t DynRefImpl + ::getPtr() const { + return OwningObject->getDyn(DynPimpl)->d_un.d_ptr; +} + +template +inline DataRefImpl DynRefImpl + ::getRawDataRefImpl() const { + return DynPimpl; +} + +/// This is a generic interface for retrieving GNU symbol version +/// information from an ELFObjectFile. +static inline error_code GetELFSymbolVersion(const ObjectFile *Obj, + const SymbolRef &Sym, + StringRef &Version, + bool &IsDefault) { + // Little-endian 32-bit + if (const ELFObjectFile *ELFObj = + dyn_cast >(Obj)) + return ELFObj->getSymbolVersion(Sym, Version, IsDefault); + + // Big-endian 32-bit + if (const ELFObjectFile *ELFObj = + dyn_cast >(Obj)) + return ELFObj->getSymbolVersion(Sym, Version, IsDefault); + + // Little-endian 64-bit + if (const ELFObjectFile *ELFObj = + dyn_cast >(Obj)) + return ELFObj->getSymbolVersion(Sym, Version, IsDefault); + + // Big-endian 64-bit + if (const ELFObjectFile *ELFObj = + dyn_cast >(Obj)) + return ELFObj->getSymbolVersion(Sym, Version, IsDefault); + + llvm_unreachable("Object passed to GetELFSymbolVersion() is not ELF"); +} + +} +} + +#endif diff --git a/contrib/llvm/include/llvm/Object/MachO.h b/contrib/llvm/include/llvm/Object/MachO.h index f5e7461a488a..0b73f9483164 100644 --- a/contrib/llvm/include/llvm/Object/MachO.h +++ b/contrib/llvm/include/llvm/Object/MachO.h @@ -18,6 +18,7 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Object/MachOObject.h" #include "llvm/Support/MachO.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallVector.h" namespace llvm { @@ -31,23 +32,36 @@ class MachOObjectFile : public ObjectFile { virtual symbol_iterator begin_symbols() const; virtual symbol_iterator end_symbols() const; + virtual symbol_iterator begin_dynamic_symbols() const; + virtual symbol_iterator end_dynamic_symbols() const; + virtual library_iterator begin_libraries_needed() const; + virtual library_iterator end_libraries_needed() const; virtual section_iterator begin_sections() const; virtual section_iterator end_sections() const; virtual uint8_t getBytesInAddress() const; virtual StringRef getFileFormatName() const; virtual unsigned getArch() const; + virtual StringRef getLoadName() const; + + MachOObject *getObject() { return MachOObj; } + + static inline bool classof(const Binary *v) { + return v->isMachO(); + } + static inline bool classof(const MachOObjectFile *v) { return true; } protected: virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const; virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const; - virtual error_code getSymbolOffset(DataRefImpl Symb, uint64_t &Res) const; + virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const; virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const; virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const; virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const; - virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const; - virtual error_code isSymbolGlobal(DataRefImpl Symb, bool &Res) const; - virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::SymbolType &Res) const; + virtual error_code getSymbolFlags(DataRefImpl Symb, uint32_t &Res) const; + virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::Type &Res) const; + virtual error_code getSymbolSection(DataRefImpl Symb, + section_iterator &Res) const; virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const; virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const; @@ -58,6 +72,10 @@ class MachOObjectFile : public ObjectFile { virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const; virtual error_code isSectionData(DataRefImpl Sec, bool &Res) const; virtual error_code isSectionBSS(DataRefImpl Sec, bool &Res) const; + virtual error_code isSectionRequiredForExecution(DataRefImpl Sec, + bool &Res) const; + virtual error_code isSectionVirtual(DataRefImpl Sec, bool &Res) const; + virtual error_code isSectionZeroInit(DataRefImpl Sec, bool &Res) const; virtual error_code sectionContainsSymbol(DataRefImpl DRI, DataRefImpl S, bool &Result) const; virtual relocation_iterator getSectionRelBegin(DataRefImpl Sec) const; @@ -67,16 +85,22 @@ class MachOObjectFile : public ObjectFile { RelocationRef &Res) const; virtual error_code getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const; + virtual error_code getRelocationOffset(DataRefImpl Rel, + uint64_t &Res) const; virtual error_code getRelocationSymbol(DataRefImpl Rel, SymbolRef &Res) const; virtual error_code getRelocationType(DataRefImpl Rel, - uint32_t &Res) const; + uint64_t &Res) const; virtual error_code getRelocationTypeName(DataRefImpl Rel, SmallVectorImpl &Result) const; virtual error_code getRelocationAdditionalInfo(DataRefImpl Rel, int64_t &Res) const; virtual error_code getRelocationValueString(DataRefImpl Rel, SmallVectorImpl &Result) const; + virtual error_code getRelocationHidden(DataRefImpl Rel, bool &Result) const; + + virtual error_code getLibraryNext(DataRefImpl LibData, LibraryRef &Res) const; + virtual error_code getLibraryPath(DataRefImpl LibData, StringRef &Res) const; private: MachOObject *MachOObj; @@ -97,6 +121,9 @@ class MachOObjectFile : public ObjectFile { void getRelocation(DataRefImpl Rel, InMemoryStruct &Res) const; std::size_t getSectionIndex(DataRefImpl Sec) const; + + void printRelocationTargetName(InMemoryStruct& RE, + raw_string_ostream &fmt) const; }; } diff --git a/contrib/llvm/include/llvm/Object/MachOObject.h b/contrib/llvm/include/llvm/Object/MachOObject.h index 51be847858a1..056040274319 100644 --- a/contrib/llvm/include/llvm/Object/MachOObject.h +++ b/contrib/llvm/include/llvm/Object/MachOObject.h @@ -177,14 +177,14 @@ class MachOObject { void ReadULEB128s(uint64_t Index, SmallVectorImpl &Out) const; /// @} - + /// @name Object Dump Facilities /// @{ /// dump - Support for debugging, callable in GDB: V->dump() // void dump() const; void dumpHeader() const; - + /// print - Implement operator<< on Value. /// void print(raw_ostream &O) const; @@ -192,7 +192,7 @@ class MachOObject { /// @} }; - + inline raw_ostream &operator<<(raw_ostream &OS, const MachOObject &V) { V.print(OS); return OS; diff --git a/contrib/llvm/include/llvm/Object/ObjectFile.h b/contrib/llvm/include/llvm/Object/ObjectFile.h index 83854a0d6c28..4dd7fb581308 100644 --- a/contrib/llvm/include/llvm/Object/ObjectFile.h +++ b/contrib/llvm/include/llvm/Object/ObjectFile.h @@ -20,6 +20,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include +#include namespace llvm { namespace object { @@ -37,6 +38,9 @@ union DataRefImpl { uint32_t a, b; } d; uintptr_t p; + DataRefImpl() { + std::memset(this, 0, sizeof(DataRefImpl)); + } }; template @@ -78,52 +82,13 @@ static bool operator ==(const DataRefImpl &a, const DataRefImpl &b) { return std::memcmp(&a, &b, sizeof(DataRefImpl)) == 0; } -/// SymbolRef - This is a value type class that represents a single symbol in -/// the list of symbols in the object file. -class SymbolRef { - friend class SectionRef; - DataRefImpl SymbolPimpl; - const ObjectFile *OwningObject; +static bool operator <(const DataRefImpl &a, const DataRefImpl &b) { + // Check bitwise identical. This is the only legal way to compare a union w/o + // knowing which member is in use. + return std::memcmp(&a, &b, sizeof(DataRefImpl)) < 0; +} -public: - SymbolRef() : OwningObject(NULL) { - std::memset(&SymbolPimpl, 0, sizeof(SymbolPimpl)); - } - - enum SymbolType { - ST_Function, - ST_Data, - ST_External, // Defined in another object file - ST_Other - }; - - SymbolRef(DataRefImpl SymbolP, const ObjectFile *Owner); - - bool operator==(const SymbolRef &Other) const; - - error_code getNext(SymbolRef &Result) const; - - error_code getName(StringRef &Result) const; - error_code getAddress(uint64_t &Result) const; - error_code getOffset(uint64_t &Result) const; - error_code getSize(uint64_t &Result) const; - error_code getSymbolType(SymbolRef::SymbolType &Result) const; - - /// Returns the ascii char that should be displayed in a symbol table dump via - /// nm for this symbol. - error_code getNMTypeChar(char &Result) const; - - /// Returns true for symbols that are internal to the object file format such - /// as section symbols. - error_code isInternal(bool &Result) const; - - /// Returns true for symbols that can be used in another objects, - /// such as library functions - error_code isGlobal(bool &Result) const; - - DataRefImpl getRawDataRefImpl() const; -}; -typedef content_iterator symbol_iterator; +class SymbolRef; /// RelocationRef - This is a value type class that represents a single /// relocation in the list of relocations in the object file. @@ -132,9 +97,7 @@ class RelocationRef { const ObjectFile *OwningObject; public: - RelocationRef() : OwningObject(NULL) { - std::memset(&RelocationPimpl, 0, sizeof(RelocationPimpl)); - } + RelocationRef() : OwningObject(NULL) { } RelocationRef(DataRefImpl RelocationP, const ObjectFile *Owner); @@ -143,8 +106,14 @@ class RelocationRef { error_code getNext(RelocationRef &Result) const; error_code getAddress(uint64_t &Result) const; + error_code getOffset(uint64_t &Result) const; error_code getSymbol(SymbolRef &Result) const; - error_code getType(uint32_t &Result) const; + error_code getType(uint64_t &Result) const; + + /// @brief Indicates whether this relocation should hidden when listing + /// relocations, usually because it is the trailing part of a multipart + /// relocation that will be printed as part of the leading relocation. + error_code getHidden(bool &Result) const; /// @brief Get a string that represents the type of this relocation. /// @@ -168,13 +137,12 @@ class SectionRef { const ObjectFile *OwningObject; public: - SectionRef() : OwningObject(NULL) { - std::memset(&SectionPimpl, 0, sizeof(SectionPimpl)); - } + SectionRef() : OwningObject(NULL) { } SectionRef(DataRefImpl SectionP, const ObjectFile *Owner); bool operator==(const SectionRef &Other) const; + bool operator <(const SectionRef &Other) const; error_code getNext(SectionRef &Result) const; @@ -190,21 +158,112 @@ class SectionRef { error_code isText(bool &Result) const; error_code isData(bool &Result) const; error_code isBSS(bool &Result) const; + error_code isRequiredForExecution(bool &Result) const; + error_code isVirtual(bool &Result) const; + error_code isZeroInit(bool &Result) const; error_code containsSymbol(SymbolRef S, bool &Result) const; relocation_iterator begin_relocations() const; relocation_iterator end_relocations() const; + + DataRefImpl getRawDataRefImpl() const; }; typedef content_iterator section_iterator; +/// SymbolRef - This is a value type class that represents a single symbol in +/// the list of symbols in the object file. +class SymbolRef { + friend class SectionRef; + DataRefImpl SymbolPimpl; + const ObjectFile *OwningObject; + +public: + SymbolRef() : OwningObject(NULL) { } + + enum Type { + ST_Unknown, // Type not specified + ST_Data, + ST_Debug, + ST_File, + ST_Function, + ST_Other + }; + + enum Flags { + SF_None = 0, + SF_Undefined = 1U << 0, // Symbol is defined in another object file + SF_Global = 1U << 1, // Global symbol + SF_Weak = 1U << 2, // Weak symbol + SF_Absolute = 1U << 3, // Absolute symbol + SF_ThreadLocal = 1U << 4, // Thread local symbol + SF_Common = 1U << 5, // Symbol has common linkage + SF_FormatSpecific = 1U << 31 // Specific to the object file format + // (e.g. section symbols) + }; + + SymbolRef(DataRefImpl SymbolP, const ObjectFile *Owner); + + bool operator==(const SymbolRef &Other) const; + bool operator <(const SymbolRef &Other) const; + + error_code getNext(SymbolRef &Result) const; + + error_code getName(StringRef &Result) const; + error_code getAddress(uint64_t &Result) const; + error_code getFileOffset(uint64_t &Result) const; + error_code getSize(uint64_t &Result) const; + error_code getType(SymbolRef::Type &Result) const; + + /// Returns the ascii char that should be displayed in a symbol table dump via + /// nm for this symbol. + error_code getNMTypeChar(char &Result) const; + + /// Get symbol flags (bitwise OR of SymbolRef::Flags) + error_code getFlags(uint32_t &Result) const; + + /// @brief Return true for common symbols such as uninitialized globals + error_code isCommon(bool &Result) const; + + /// @brief Get section this symbol is defined in reference to. Result is + /// end_sections() if it is undefined or is an absolute symbol. + error_code getSection(section_iterator &Result) const; + + DataRefImpl getRawDataRefImpl() const; +}; +typedef content_iterator symbol_iterator; + +/// LibraryRef - This is a value type class that represents a single library in +/// the list of libraries needed by a shared or dynamic object. +class LibraryRef { + friend class SectionRef; + DataRefImpl LibraryPimpl; + const ObjectFile *OwningObject; + +public: + LibraryRef() : OwningObject(NULL) { } + + LibraryRef(DataRefImpl LibraryP, const ObjectFile *Owner); + + bool operator==(const LibraryRef &Other) const; + bool operator <(const LibraryRef &Other) const; + + error_code getNext(LibraryRef &Result) const; + + // Get the path to this library, as stored in the object file. + error_code getPath(StringRef &Result) const; + + DataRefImpl getRawDataRefImpl() const; +}; +typedef content_iterator library_iterator; + const uint64_t UnknownAddressOrSize = ~0ULL; /// ObjectFile - This class is the base class for all object file types. /// Concrete instances of this object are created by createObjectFile, which /// figure out which type to create. class ObjectFile : public Binary { -private: + virtual void anchor(); ObjectFile(); // = delete ObjectFile(const ObjectFile &other); // = delete @@ -227,12 +286,15 @@ class ObjectFile : public Binary { virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const = 0; virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const = 0; virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const =0; - virtual error_code getSymbolOffset(DataRefImpl Symb, uint64_t &Res) const =0; + virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const =0; virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const = 0; + virtual error_code getSymbolType(DataRefImpl Symb, + SymbolRef::Type &Res) const = 0; virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const = 0; - virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const = 0; - virtual error_code isSymbolGlobal(DataRefImpl Symb, bool &Res) const = 0; - virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::SymbolType &Res) const = 0; + virtual error_code getSymbolFlags(DataRefImpl Symb, + uint32_t &Res) const = 0; + virtual error_code getSymbolSection(DataRefImpl Symb, + section_iterator &Res) const = 0; // Same as above for SectionRef. friend class SectionRef; @@ -245,6 +307,11 @@ class ObjectFile : public Binary { virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const = 0; virtual error_code isSectionData(DataRefImpl Sec, bool &Res) const = 0; virtual error_code isSectionBSS(DataRefImpl Sec, bool &Res) const = 0; + virtual error_code isSectionRequiredForExecution(DataRefImpl Sec, + bool &Res) const = 0; + // A section is 'virtual' if its contents aren't present in the object image. + virtual error_code isSectionVirtual(DataRefImpl Sec, bool &Res) const = 0; + virtual error_code isSectionZeroInit(DataRefImpl Sec, bool &Res) const = 0; virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb, bool &Result) const = 0; virtual relocation_iterator getSectionRelBegin(DataRefImpl Sec) const = 0; @@ -257,25 +324,42 @@ class ObjectFile : public Binary { RelocationRef &Res) const = 0; virtual error_code getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const =0; + virtual error_code getRelocationOffset(DataRefImpl Rel, + uint64_t &Res) const =0; virtual error_code getRelocationSymbol(DataRefImpl Rel, SymbolRef &Res) const = 0; virtual error_code getRelocationType(DataRefImpl Rel, - uint32_t &Res) const = 0; + uint64_t &Res) const = 0; virtual error_code getRelocationTypeName(DataRefImpl Rel, SmallVectorImpl &Result) const = 0; virtual error_code getRelocationAdditionalInfo(DataRefImpl Rel, int64_t &Res) const = 0; virtual error_code getRelocationValueString(DataRefImpl Rel, SmallVectorImpl &Result) const = 0; + virtual error_code getRelocationHidden(DataRefImpl Rel, bool &Result) const { + Result = false; + return object_error::success; + } + + // Same for LibraryRef + friend class LibraryRef; + virtual error_code getLibraryNext(DataRefImpl Lib, LibraryRef &Res) const = 0; + virtual error_code getLibraryPath(DataRefImpl Lib, StringRef &Res) const = 0; public: virtual symbol_iterator begin_symbols() const = 0; virtual symbol_iterator end_symbols() const = 0; + virtual symbol_iterator begin_dynamic_symbols() const = 0; + virtual symbol_iterator end_dynamic_symbols() const = 0; + virtual section_iterator begin_sections() const = 0; virtual section_iterator end_sections() const = 0; + virtual library_iterator begin_libraries_needed() const = 0; + virtual library_iterator end_libraries_needed() const = 0; + /// @brief The number of bytes used to represent an address in this object /// file format. virtual uint8_t getBytesInAddress() const = 0; @@ -283,6 +367,11 @@ class ObjectFile : public Binary { virtual StringRef getFileFormatName() const = 0; virtual /* Triple::ArchType */ unsigned getArch() const = 0; + /// For shared objects, returns the name which this object should be + /// loaded from at runtime. This corresponds to DT_SONAME on ELF and + /// LC_ID_DYLIB (install name) on MachO. + virtual StringRef getLoadName() const = 0; + /// @returns Pointer to ObjectFile subclass to handle this type of object. /// @param ObjectPath The path to the object file. ObjectPath.isObject must /// return true. @@ -291,8 +380,7 @@ class ObjectFile : public Binary { static ObjectFile *createObjectFile(MemoryBuffer *Object); static inline bool classof(const Binary *v) { - return v->getType() >= isObject && - v->getType() < lastObject; + return v->isObject(); } static inline bool classof(const ObjectFile *v) { return true; } @@ -311,6 +399,10 @@ inline bool SymbolRef::operator==(const SymbolRef &Other) const { return SymbolPimpl == Other.SymbolPimpl; } +inline bool SymbolRef::operator <(const SymbolRef &Other) const { + return SymbolPimpl < Other.SymbolPimpl; +} + inline error_code SymbolRef::getNext(SymbolRef &Result) const { return OwningObject->getSymbolNext(SymbolPimpl, Result); } @@ -323,8 +415,8 @@ inline error_code SymbolRef::getAddress(uint64_t &Result) const { return OwningObject->getSymbolAddress(SymbolPimpl, Result); } -inline error_code SymbolRef::getOffset(uint64_t &Result) const { - return OwningObject->getSymbolOffset(SymbolPimpl, Result); +inline error_code SymbolRef::getFileOffset(uint64_t &Result) const { + return OwningObject->getSymbolFileOffset(SymbolPimpl, Result); } inline error_code SymbolRef::getSize(uint64_t &Result) const { @@ -335,15 +427,15 @@ inline error_code SymbolRef::getNMTypeChar(char &Result) const { return OwningObject->getSymbolNMTypeChar(SymbolPimpl, Result); } -inline error_code SymbolRef::isInternal(bool &Result) const { - return OwningObject->isSymbolInternal(SymbolPimpl, Result); +inline error_code SymbolRef::getFlags(uint32_t &Result) const { + return OwningObject->getSymbolFlags(SymbolPimpl, Result); } -inline error_code SymbolRef::isGlobal(bool &Result) const { - return OwningObject->isSymbolGlobal(SymbolPimpl, Result); +inline error_code SymbolRef::getSection(section_iterator &Result) const { + return OwningObject->getSymbolSection(SymbolPimpl, Result); } -inline error_code SymbolRef::getSymbolType(SymbolRef::SymbolType &Result) const { +inline error_code SymbolRef::getType(SymbolRef::Type &Result) const { return OwningObject->getSymbolType(SymbolPimpl, Result); } @@ -362,6 +454,10 @@ inline bool SectionRef::operator==(const SectionRef &Other) const { return SectionPimpl == Other.SectionPimpl; } +inline bool SectionRef::operator <(const SectionRef &Other) const { + return SectionPimpl < Other.SectionPimpl; +} + inline error_code SectionRef::getNext(SectionRef &Result) const { return OwningObject->getSectionNext(SectionPimpl, Result); } @@ -398,6 +494,18 @@ inline error_code SectionRef::isBSS(bool &Result) const { return OwningObject->isSectionBSS(SectionPimpl, Result); } +inline error_code SectionRef::isRequiredForExecution(bool &Result) const { + return OwningObject->isSectionRequiredForExecution(SectionPimpl, Result); +} + +inline error_code SectionRef::isVirtual(bool &Result) const { + return OwningObject->isSectionVirtual(SectionPimpl, Result); +} + +inline error_code SectionRef::isZeroInit(bool &Result) const { + return OwningObject->isSectionZeroInit(SectionPimpl, Result); +} + inline error_code SectionRef::containsSymbol(SymbolRef S, bool &Result) const { return OwningObject->sectionContainsSymbol(SectionPimpl, S.SymbolPimpl, Result); @@ -411,6 +519,9 @@ inline relocation_iterator SectionRef::end_relocations() const { return OwningObject->getSectionRelEnd(SectionPimpl); } +inline DataRefImpl SectionRef::getRawDataRefImpl() const { + return SectionPimpl; +} /// RelocationRef inline RelocationRef::RelocationRef(DataRefImpl RelocationP, @@ -430,11 +541,15 @@ inline error_code RelocationRef::getAddress(uint64_t &Result) const { return OwningObject->getRelocationAddress(RelocationPimpl, Result); } +inline error_code RelocationRef::getOffset(uint64_t &Result) const { + return OwningObject->getRelocationOffset(RelocationPimpl, Result); +} + inline error_code RelocationRef::getSymbol(SymbolRef &Result) const { return OwningObject->getRelocationSymbol(RelocationPimpl, Result); } -inline error_code RelocationRef::getType(uint32_t &Result) const { +inline error_code RelocationRef::getType(uint64_t &Result) const { return OwningObject->getRelocationType(RelocationPimpl, Result); } @@ -452,6 +567,30 @@ inline error_code RelocationRef::getValueString(SmallVectorImpl &Result) return OwningObject->getRelocationValueString(RelocationPimpl, Result); } +inline error_code RelocationRef::getHidden(bool &Result) const { + return OwningObject->getRelocationHidden(RelocationPimpl, Result); +} +// Inline function definitions. +inline LibraryRef::LibraryRef(DataRefImpl LibraryP, const ObjectFile *Owner) + : LibraryPimpl(LibraryP) + , OwningObject(Owner) {} + +inline bool LibraryRef::operator==(const LibraryRef &Other) const { + return LibraryPimpl == Other.LibraryPimpl; +} + +inline bool LibraryRef::operator <(const LibraryRef &Other) const { + return LibraryPimpl < Other.LibraryPimpl; +} + +inline error_code LibraryRef::getNext(LibraryRef &Result) const { + return OwningObject->getLibraryNext(LibraryPimpl, Result); +} + +inline error_code LibraryRef::getPath(StringRef &Result) const { + return OwningObject->getLibraryPath(LibraryPimpl, Result); +} + } // end namespace object } // end namespace llvm diff --git a/contrib/llvm/include/llvm/Operator.h b/contrib/llvm/include/llvm/Operator.h index 48a5796383b4..abd6a1939d71 100644 --- a/contrib/llvm/include/llvm/Operator.h +++ b/contrib/llvm/include/llvm/Operator.h @@ -261,8 +261,8 @@ class GEPOperator /// getPointerOperandType - Method to return the pointer operand as a /// PointerType. - PointerType *getPointerOperandType() const { - return reinterpret_cast(getPointerOperand()->getType()); + Type *getPointerOperandType() const { + return getPointerOperand()->getType(); } unsigned getNumIndices() const { // Note: always non-negative diff --git a/contrib/llvm/include/llvm/Pass.h b/contrib/llvm/include/llvm/Pass.h index 04dd8b60547a..888537daa425 100644 --- a/contrib/llvm/include/llvm/Pass.h +++ b/contrib/llvm/include/llvm/Pass.h @@ -53,7 +53,7 @@ typedef const void* AnalysisID; /// Ordering of pass manager types is important here. enum PassManagerType { PMT_Unknown = 0, - PMT_ModulePassManager = 1, ///< MPPassManager + PMT_ModulePassManager = 1, ///< MPPassManager PMT_CallGraphPassManager, ///< CGPassManager PMT_FunctionPassManager, ///< FPPassManager PMT_LoopPassManager, ///< LPPassManager @@ -84,14 +84,14 @@ class Pass { PassKind Kind; void operator=(const Pass&); // DO NOT IMPLEMENT Pass(const Pass &); // DO NOT IMPLEMENT - + public: - explicit Pass(PassKind K, char &pid); + explicit Pass(PassKind K, char &pid) : Resolver(0), PassID(&pid), Kind(K) { } virtual ~Pass(); - + PassKind getPassKind() const { return Kind; } - + /// getPassName - Return a nice clean name for a pass. This usually /// implemented in terms of the name that is registered by one of the /// Registration templates, but can be overloaded directly. @@ -99,7 +99,7 @@ class Pass { virtual const char *getPassName() const; /// getPassID - Return the PassID number that corresponds to this pass. - virtual AnalysisID getPassID() const { + AnalysisID getPassID() const { return PassID; } @@ -119,12 +119,12 @@ class Pass { const std::string &Banner) const = 0; /// Each pass is responsible for assigning a pass manager to itself. - /// PMS is the stack of available pass manager. - virtual void assignPassManager(PMStack &, + /// PMS is the stack of available pass manager. + virtual void assignPassManager(PMStack &, PassManagerType) {} /// Check if available pass managers are suitable for this pass or not. virtual void preparePassManager(PMStack &); - + /// Return what kind of Pass Manager can manage this pass. virtual PassManagerType getPotentialPassManagerType() const; @@ -159,9 +159,9 @@ class Pass { virtual void *getAdjustedAnalysisPointer(AnalysisID ID); virtual ImmutablePass *getAsImmutablePass(); virtual PMDataManager *getAsPMDataManager(); - + /// verifyAnalysis() - This member can be implemented by a analysis pass to - /// check state of analysis information. + /// check state of analysis information. virtual void verifyAnalysis() const; // dumpPassStructure - Implement the -debug-passes=PassStructure option @@ -175,6 +175,10 @@ class Pass { // argument string, or null if it is not known. static const PassInfo *lookupPassInfo(StringRef Arg); + // createPass - Create a object for the specified pass class, + // or null if it is not known. + static Pass *createPass(AnalysisID ID); + /// getAnalysisIfAvailable() - Subclasses use this function to /// get analysis information that might be around, for example to update it. /// This is different than getAnalysis in that it can fail (if the analysis @@ -226,7 +230,7 @@ class ModulePass : public Pass { /// being operated on. virtual bool runOnModule(Module &M) = 0; - virtual void assignPassManager(PMStack &PMS, + virtual void assignPassManager(PMStack &PMS, PassManagerType T); /// Return what kind of Pass Manager can manage this pass. @@ -259,9 +263,9 @@ class ImmutablePass : public ModulePass { /// bool runOnModule(Module &) { return false; } - explicit ImmutablePass(char &pid) + explicit ImmutablePass(char &pid) : ModulePass(pid) {} - + // Force out-of-line virtual method. virtual ~ImmutablePass(); }; @@ -286,7 +290,7 @@ class FunctionPass : public Pass { /// any necessary per-module initialization. /// virtual bool doInitialization(Module &); - + /// runOnFunction - Virtual method overriden by subclasses to do the /// per-function processing of the pass. /// @@ -297,7 +301,7 @@ class FunctionPass : public Pass { /// virtual bool doFinalization(Module &); - virtual void assignPassManager(PMStack &PMS, + virtual void assignPassManager(PMStack &PMS, PassManagerType T); /// Return what kind of Pass Manager can manage this pass. @@ -348,7 +352,7 @@ class BasicBlockPass : public Pass { /// virtual bool doFinalization(Module &); - virtual void assignPassManager(PMStack &PMS, + virtual void assignPassManager(PMStack &PMS, PassManagerType T); /// Return what kind of Pass Manager can manage this pass. diff --git a/contrib/llvm/include/llvm/PassAnalysisSupport.h b/contrib/llvm/include/llvm/PassAnalysisSupport.h index fede1216c3c4..5c6a2d7a92f9 100644 --- a/contrib/llvm/include/llvm/PassAnalysisSupport.h +++ b/contrib/llvm/include/llvm/PassAnalysisSupport.h @@ -19,6 +19,7 @@ #ifndef LLVM_PASS_ANALYSIS_SUPPORT_H #define LLVM_PASS_ANALYSIS_SUPPORT_H +#include "llvm/Pass.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include diff --git a/contrib/llvm/include/llvm/PassManager.h b/contrib/llvm/include/llvm/PassManager.h index c8b5dcaf0f2d..ce5fda79f9c7 100644 --- a/contrib/llvm/include/llvm/PassManager.h +++ b/contrib/llvm/include/llvm/PassManager.h @@ -53,17 +53,13 @@ class PassManager : public PassManagerBase { /// will be destroyed as well, so there is no need to delete the pass. This /// implies that all passes MUST be allocated with 'new'. void add(Pass *P); - + /// run - Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. bool run(Module &M); private: - /// addImpl - Add a pass to the queue of passes to run, without - /// checking whether to add a printer pass. - void addImpl(Pass *P); - - /// PassManagerImpl_New is the actual class. PassManager is just the + /// PassManagerImpl_New is the actual class. PassManager is just the /// wraper to publish simple pass manager interface PassManagerImpl *PM; }; @@ -75,11 +71,11 @@ class FunctionPassManager : public PassManagerBase { /// but does not take ownership of, the specified Module. explicit FunctionPassManager(Module *M); ~FunctionPassManager(); - + /// add - Add a pass to the queue of passes to run. This passes /// ownership of the Pass to the PassManager. When the /// PassManager_X is destroyed, the pass will be destroyed as well, so - /// there is no need to delete the pass. (TODO delete passes.) + /// there is no need to delete the pass. /// This implies that all passes MUST be allocated with 'new'. void add(Pass *P); @@ -88,20 +84,16 @@ class FunctionPassManager : public PassManagerBase { /// so, return true. /// bool run(Function &F); - + /// doInitialization - Run all of the initializers for the function passes. /// bool doInitialization(); - + /// doFinalization - Run all of the finalizers for the function passes. /// bool doFinalization(); - -private: - /// addImpl - Add a pass to the queue of passes to run, without - /// checking whether to add a printer pass. - void addImpl(Pass *P); +private: FunctionPassManagerImpl *FPM; Module *M; }; diff --git a/contrib/llvm/include/llvm/PassManagers.h b/contrib/llvm/include/llvm/PassManagers.h index c05347da7934..fa29f50ccf77 100644 --- a/contrib/llvm/include/llvm/PassManagers.h +++ b/contrib/llvm/include/llvm/PassManagers.h @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file declares the LLVM Pass Manager infrastructure. +// This file declares the LLVM Pass Manager infrastructure. // //===----------------------------------------------------------------------===// @@ -24,11 +24,11 @@ //===----------------------------------------------------------------------===// // Overview: // The Pass Manager Infrastructure manages passes. It's responsibilities are: -// +// // o Manage optimization pass execution order // o Make required Analysis information available before pass P is run // o Release memory occupied by dead passes -// o If Analysis information is dirtied by a pass then regenerate Analysis +// o If Analysis information is dirtied by a pass then regenerate Analysis // information before it is consumed by another pass. // // Pass Manager Infrastructure uses multiple pass managers. They are @@ -43,13 +43,13 @@ // // [o] class PMTopLevelManager; // -// Two top level managers, PassManager and FunctionPassManager, derive from -// PMTopLevelManager. PMTopLevelManager manages information used by top level +// Two top level managers, PassManager and FunctionPassManager, derive from +// PMTopLevelManager. PMTopLevelManager manages information used by top level // managers such as last user info. // // [o] class PMDataManager; // -// PMDataManager manages information, e.g. list of available analysis info, +// PMDataManager manages information, e.g. list of available analysis info, // used by a pass manager to manage execution order of passes. It also provides // a place to implement common pass manager APIs. All pass managers derive from // PMDataManager. @@ -82,7 +82,7 @@ // relies on PassManagerImpl to do all the tasks. // // [o] class PassManagerImpl : public Pass, public PMDataManager, -// public PMDTopLevelManager +// public PMTopLevelManager // // PassManagerImpl is a top level pass manager responsible for managing // MPPassManagers. @@ -109,7 +109,7 @@ enum PassDebuggingString { ON_REGION_MSG, // " 'on Region ...\n'" ON_LOOP_MSG, // " 'on Loop ...\n'" ON_CG_MSG // "' on Call Graph ...\n'" -}; +}; /// PassManagerPrettyStackEntry - This is used to print informative information /// about what pass is running when/if a stack trace is generated. @@ -124,19 +124,19 @@ class PassManagerPrettyStackEntry : public PrettyStackTraceEntry { : P(p), V(&v), M(0) {} // When P is run on V PassManagerPrettyStackEntry(Pass *p, Module &m) : P(p), V(0), M(&m) {} // When P is run on M - + /// print - Emit information about this stack frame to OS. virtual void print(raw_ostream &OS) const; }; - - + + //===----------------------------------------------------------------------===// // PMStack // /// PMStack - This class implements a stack data structure of PMDataManager /// pointers. /// -/// Top level pass managers (see PassManager.cpp) maintain active Pass Managers +/// Top level pass managers (see PassManager.cpp) maintain active Pass Managers /// using PMStack. Each Pass implements assignPassManager() to connect itself /// with appropriate manager. assignPassManager() walks PMStack to find /// suitable manager. @@ -174,9 +174,8 @@ class PMTopLevelManager { void initializeAllAnalysisInfo(); private: - /// This is implemented by top level pass manager and used by - /// schedulePass() to add analysis info passes that are not available. - virtual void addTopLevelPass(Pass *P) = 0; + virtual PMDataManager *getAsPMDataManager() = 0; + virtual PassManagerType getTopLevelPassManagerType() = 0; public: /// Schedule pass P for execution. Make sure that passes required by @@ -198,7 +197,7 @@ class PMTopLevelManager { /// Find analysis usage information for the pass P. AnalysisUsage *findAnalysisUsage(Pass *P); - virtual ~PMTopLevelManager(); + virtual ~PMTopLevelManager(); /// Add immutable pass and initialize it. inline void addImmutablePass(ImmutablePass *P) { @@ -228,7 +227,7 @@ class PMTopLevelManager { PMStack activeStack; protected: - + /// Collection of pass managers SmallVector PassManagers; @@ -254,7 +253,7 @@ class PMTopLevelManager { }; - + //===----------------------------------------------------------------------===// // PMDataManager @@ -268,7 +267,7 @@ class PMDataManager { } virtual ~PMDataManager(); - + virtual Pass *getAsPass() = 0; /// Augment AvailableAnalysis by adding analysis made available by pass P. @@ -279,16 +278,16 @@ class PMDataManager { /// Remove Analysis that is not preserved by the pass void removeNotPreservedAnalysis(Pass *P); - + /// Remove dead passes used by P. - void removeDeadPasses(Pass *P, StringRef Msg, + void removeDeadPasses(Pass *P, StringRef Msg, enum PassDebuggingString); /// Remove P. - void freePass(Pass *P, StringRef Msg, + void freePass(Pass *P, StringRef Msg, enum PassDebuggingString); - /// Add pass P into the PassVector. Update + /// Add pass P into the PassVector. Update /// AvailableAnalysis appropriately if ProcessAnalysis is true. void add(Pass *P, bool ProcessAnalysis = true); @@ -300,7 +299,7 @@ class PMDataManager { virtual Pass *getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F); /// Initialize available analysis information. - void initializeAnalysisInfo() { + void initializeAnalysisInfo() { AvailableAnalysis.clear(); for (unsigned i = 0; i < PMT_Last; ++i) InheritedAnalysis[i] = NULL; @@ -347,9 +346,9 @@ class PMDataManager { return (unsigned)PassVector.size(); } - virtual PassManagerType getPassManagerType() const { + virtual PassManagerType getPassManagerType() const { assert ( 0 && "Invalid use of getPassManagerType"); - return PMT_Unknown; + return PMT_Unknown; } std::map *getAvailableAnalysis() { @@ -377,17 +376,17 @@ class PMDataManager { // then PMT_Last active pass mangers. std::map *InheritedAnalysis[PMT_Last]; - + /// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions /// or higher is specified. bool isPassDebuggingExecutionsOrMore() const; - + private: void dumpAnalysisUsage(StringRef Msg, const Pass *P, const AnalysisUsage::VectorType &Set) const; - // Set of available Analysis. This information is used while scheduling - // pass. If a pass requires an analysis which is not available then + // Set of available Analysis. This information is used while scheduling + // pass. If a pass requires an analysis which is not available then // the required analysis pass is scheduled to run before the pass itself is // scheduled to run. std::map AvailableAnalysis; @@ -403,27 +402,27 @@ class PMDataManager { // FPPassManager // /// FPPassManager manages BBPassManagers and FunctionPasses. -/// It batches all function passes and basic block pass managers together and -/// sequence them to process one function at a time before processing next +/// It batches all function passes and basic block pass managers together and +/// sequence them to process one function at a time before processing next /// function. class FPPassManager : public ModulePass, public PMDataManager { public: static char ID; - explicit FPPassManager() + explicit FPPassManager() : ModulePass(ID), PMDataManager() { } - + /// run - Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. bool runOnFunction(Function &F); bool runOnModule(Module &M); - + /// cleanup - After running all passes, clean up pass manager cache. void cleanup(); /// doInitialization - Run all of the initializers for the function passes. /// bool doInitialization(Module &M); - + /// doFinalization - Run all of the finalizers for the function passes. /// bool doFinalization(Module &M); @@ -449,8 +448,8 @@ class FPPassManager : public ModulePass, public PMDataManager { return FP; } - virtual PassManagerType getPassManagerType() const { - return PMT_FunctionPassManager; + virtual PassManagerType getPassManagerType() const { + return PMT_FunctionPassManager; } }; diff --git a/contrib/llvm/include/llvm/PassSupport.h b/contrib/llvm/include/llvm/PassSupport.h index 082790956c46..c50c2cc184e3 100644 --- a/contrib/llvm/include/llvm/PassSupport.h +++ b/contrib/llvm/include/llvm/PassSupport.h @@ -25,6 +25,7 @@ #include "llvm/PassRegistry.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Atomic.h" +#include "llvm/Support/Valgrind.h" #include namespace llvm { @@ -135,7 +136,10 @@ class PassInfo { if (old_val == 0) { \ function(Registry); \ sys::MemoryFence(); \ + TsanIgnoreWritesBegin(); \ + TsanHappensBefore(&initialized); \ initialized = 2; \ + TsanIgnoreWritesEnd(); \ } else { \ sys::cas_flag tmp = initialized; \ sys::MemoryFence(); \ @@ -143,7 +147,8 @@ class PassInfo { tmp = initialized; \ sys::MemoryFence(); \ } \ - } + } \ + TsanHappensAfter(&initialized); #define INITIALIZE_PASS(passName, arg, name, cfg, analysis) \ static void* initialize##passName##PassOnce(PassRegistry &Registry) { \ diff --git a/contrib/llvm/include/llvm/Support/BlockFrequency.h b/contrib/llvm/include/llvm/Support/BlockFrequency.h index 554b7845696d..839cf9371247 100644 --- a/contrib/llvm/include/llvm/Support/BlockFrequency.h +++ b/contrib/llvm/include/llvm/Support/BlockFrequency.h @@ -14,6 +14,8 @@ #ifndef LLVM_SUPPORT_BLOCKFREQUENCY_H #define LLVM_SUPPORT_BLOCKFREQUENCY_H +#include "llvm/Support/DataTypes.h" + namespace llvm { class raw_ostream; diff --git a/contrib/llvm/include/llvm/Support/BranchProbability.h b/contrib/llvm/include/llvm/Support/BranchProbability.h index 05c24d4fcfcb..eedf69247ef5 100644 --- a/contrib/llvm/include/llvm/Support/BranchProbability.h +++ b/contrib/llvm/include/llvm/Support/BranchProbability.h @@ -15,6 +15,7 @@ #define LLVM_SUPPORT_BRANCHPROBABILITY_H #include "llvm/Support/DataTypes.h" +#include namespace llvm { @@ -22,7 +23,6 @@ class raw_ostream; // This class represents Branch Probability as a non-negative fraction. class BranchProbability { - // Numerator uint32_t N; @@ -30,19 +30,44 @@ class BranchProbability { uint32_t D; public: - BranchProbability(uint32_t n, uint32_t d); + BranchProbability(uint32_t n, uint32_t d) : N(n), D(d) { + assert(d > 0 && "Denomiator cannot be 0!"); + assert(n <= d && "Probability cannot be bigger than 1!"); + } + + static BranchProbability getZero() { return BranchProbability(0, 1); } + static BranchProbability getOne() { return BranchProbability(1, 1); } uint32_t getNumerator() const { return N; } uint32_t getDenominator() const { return D; } // Return (1 - Probability). - BranchProbability getCompl() { + BranchProbability getCompl() const { return BranchProbability(D - N, D); } void print(raw_ostream &OS) const; void dump() const; + + bool operator==(BranchProbability RHS) const { + return (uint64_t)N * RHS.D == (uint64_t)D * RHS.N; + } + bool operator!=(BranchProbability RHS) const { + return !(*this == RHS); + } + bool operator<(BranchProbability RHS) const { + return (uint64_t)N * RHS.D < (uint64_t)D * RHS.N; + } + bool operator>(BranchProbability RHS) const { + return RHS < *this; + } + bool operator<=(BranchProbability RHS) const { + return (uint64_t)N * RHS.D <= (uint64_t)D * RHS.N; + } + bool operator>=(BranchProbability RHS) const { + return RHS <= *this; + } }; raw_ostream &operator<<(raw_ostream &OS, const BranchProbability &Prob); diff --git a/contrib/llvm/include/llvm/Support/CFG.h b/contrib/llvm/include/llvm/Support/CFG.h index 29313ef90099..f5dc8ea055a3 100644 --- a/contrib/llvm/include/llvm/Support/CFG.h +++ b/contrib/llvm/include/llvm/Support/CFG.h @@ -71,6 +71,12 @@ class PredIterator : public std::iterator pred_iterator; @@ -314,6 +320,7 @@ template <> struct GraphTraits : public GraphTraits { typedef Function::iterator nodes_iterator; static nodes_iterator nodes_begin(Function *F) { return F->begin(); } static nodes_iterator nodes_end (Function *F) { return F->end(); } + static unsigned size (Function *F) { return F->size(); } }; template <> struct GraphTraits : public GraphTraits { @@ -323,6 +330,7 @@ template <> struct GraphTraits : typedef Function::const_iterator nodes_iterator; static nodes_iterator nodes_begin(const Function *F) { return F->begin(); } static nodes_iterator nodes_end (const Function *F) { return F->end(); } + static unsigned size (const Function *F) { return F->size(); } }; diff --git a/contrib/llvm/include/llvm/Support/COFF.h b/contrib/llvm/include/llvm/Support/COFF.h index 673925593e6a..88c60bac7402 100644 --- a/contrib/llvm/include/llvm/Support/COFF.h +++ b/contrib/llvm/include/llvm/Support/COFF.h @@ -24,6 +24,7 @@ #define LLVM_SUPPORT_WIN_COFF_H #include "llvm/Support/DataTypes.h" +#include #include namespace llvm { @@ -49,8 +50,65 @@ namespace COFF { }; enum MachineTypes { - IMAGE_FILE_MACHINE_I386 = 0x14C, - IMAGE_FILE_MACHINE_AMD64 = 0x8664 + IMAGE_FILE_MACHINE_UNKNOWN = 0x0, + IMAGE_FILE_MACHINE_AM33 = 0x13, + IMAGE_FILE_MACHINE_AMD64 = 0x8664, + IMAGE_FILE_MACHINE_ARM = 0x1C0, + IMAGE_FILE_MACHINE_ARMV7 = 0x1C4, + IMAGE_FILE_MACHINE_EBC = 0xEBC, + IMAGE_FILE_MACHINE_I386 = 0x14C, + IMAGE_FILE_MACHINE_IA64 = 0x200, + IMAGE_FILE_MACHINE_M32R = 0x9041, + IMAGE_FILE_MACHINE_MIPS16 = 0x266, + IMAGE_FILE_MACHINE_MIPSFPU = 0x366, + IMAGE_FILE_MACHINE_MIPSFPU16 = 0x466, + IMAGE_FILE_MACHINE_POWERPC = 0x1F0, + IMAGE_FILE_MACHINE_POWERPCFP = 0x1F1, + IMAGE_FILE_MACHINE_R4000 = 0x166, + IMAGE_FILE_MACHINE_SH3 = 0x1A2, + IMAGE_FILE_MACHINE_SH3DSP = 0x1A3, + IMAGE_FILE_MACHINE_SH4 = 0x1A6, + IMAGE_FILE_MACHINE_SH5 = 0x1A8, + IMAGE_FILE_MACHINE_THUMB = 0x1C2, + IMAGE_FILE_MACHINE_WCEMIPSV2 = 0x169 + }; + + enum Characteristics { + /// The file does not contain base relocations and must be loaded at its + /// preferred base. If this cannot be done, the loader will error. + IMAGE_FILE_RELOCS_STRIPPED = 0x0001, + /// The file is valid and can be run. + IMAGE_FILE_EXECUTABLE_IMAGE = 0x0002, + /// COFF line numbers have been stripped. This is deprecated and should be + /// 0. + IMAGE_FILE_LINE_NUMS_STRIPPED = 0x0004, + /// COFF symbol table entries for local symbols have been removed. This is + /// deprecated and should be 0. + IMAGE_FILE_LOCAL_SYMS_STRIPPED = 0x0008, + /// Aggressively trim working set. This is deprecated and must be 0. + IMAGE_FILE_AGGRESSIVE_WS_TRIM = 0x0010, + /// Image can handle > 2GiB addresses. + IMAGE_FILE_LARGE_ADDRESS_AWARE = 0x0020, + /// Little endian: the LSB precedes the MSB in memory. This is deprecated + /// and should be 0. + IMAGE_FILE_BYTES_REVERSED_LO = 0x0080, + /// Machine is based on a 32bit word architecture. + IMAGE_FILE_32BIT_MACHINE = 0x0100, + /// Debugging info has been removed. + IMAGE_FILE_DEBUG_STRIPPED = 0x0200, + /// If the image is on removable media, fully load it and copy it to swap. + IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP = 0x0400, + /// If the image is on network media, fully load it and copy it to swap. + IMAGE_FILE_NET_RUN_FROM_SWAP = 0x0800, + /// The image file is a system file, not a user program. + IMAGE_FILE_SYSTEM = 0x1000, + /// The image file is a DLL. + IMAGE_FILE_DLL = 0x2000, + /// This file should only be run on a uniprocessor machine. + IMAGE_FILE_UP_SYSTEM_ONLY = 0x4000, + /// Big endian: the MSB precedes the LSB in memory. This is deprecated + /// and should be 0. + IMAGE_FILE_BYTES_REVERSED_HI = 0x8000 }; struct symbol { @@ -231,6 +289,24 @@ namespace COFF { IMAGE_REL_AMD64_SSPAN32 = 0x0010 }; + enum RelocationTypesARM { + IMAGE_REL_ARM_ABSOLUTE = 0x0000, + IMAGE_REL_ARM_ADDR32 = 0x0001, + IMAGE_REL_ARM_ADDR32NB = 0x0002, + IMAGE_REL_ARM_BRANCH24 = 0x0003, + IMAGE_REL_ARM_BRANCH11 = 0x0004, + IMAGE_REL_ARM_TOKEN = 0x0005, + IMAGE_REL_ARM_BLX24 = 0x0008, + IMAGE_REL_ARM_BLX11 = 0x0009, + IMAGE_REL_ARM_SECTION = 0x000E, + IMAGE_REL_ARM_SECREL = 0x000F, + IMAGE_REL_ARM_MOV32A = 0x0010, + IMAGE_REL_ARM_MOV32T = 0x0011, + IMAGE_REL_ARM_BRANCH20T = 0x0012, + IMAGE_REL_ARM_BRANCH24T = 0x0014, + IMAGE_REL_ARM_BLX23T = 0x0015 + }; + enum COMDATType { IMAGE_COMDAT_SELECT_NODUPLICATES = 1, IMAGE_COMDAT_SELECT_ANY, @@ -292,7 +368,219 @@ namespace COFF { AuxiliarySectionDefinition SectionDefinition; }; -} // End namespace llvm. + /// @brief The Import Directory Table. + /// + /// There is a single array of these and one entry per imported DLL. + struct ImportDirectoryTableEntry { + uint32_t ImportLookupTableRVA; + uint32_t TimeDateStamp; + uint32_t ForwarderChain; + uint32_t NameRVA; + uint32_t ImportAddressTableRVA; + }; + + /// @brief The PE32 Import Lookup Table. + /// + /// There is an array of these for each imported DLL. It represents either + /// the ordinal to import from the target DLL, or a name to lookup and import + /// from the target DLL. + /// + /// This also happens to be the same format used by the Import Address Table + /// when it is initially written out to the image. + struct ImportLookupTableEntry32 { + uint32_t data; + + /// @brief Is this entry specified by ordinal, or name? + bool isOrdinal() const { return data & 0x80000000; } + + /// @brief Get the ordinal value of this entry. isOrdinal must be true. + uint16_t getOrdinal() const { + assert(isOrdinal() && "ILT entry is not an ordinal!"); + return data & 0xFFFF; + } + + /// @brief Set the ordinal value and set isOrdinal to true. + void setOrdinal(uint16_t o) { + data = o; + data |= 0x80000000; + } + + /// @brief Get the Hint/Name entry RVA. isOrdinal must be false. + uint32_t getHintNameRVA() const { + assert(!isOrdinal() && "ILT entry is not a Hint/Name RVA!"); + return data; + } + + /// @brief Set the Hint/Name entry RVA and set isOrdinal to false. + void setHintNameRVA(uint32_t rva) { data = rva; } + }; + + /// @brief The DOS compatible header at the front of all PEs. + struct DOSHeader { + uint16_t Magic; + uint16_t UsedBytesInTheLastPage; + uint16_t FileSizeInPages; + uint16_t NumberOfRelocationItems; + uint16_t HeaderSizeInParagraphs; + uint16_t MinimumExtraParagraphs; + uint16_t MaximumExtraParagraphs; + uint16_t InitialRelativeSS; + uint16_t InitialSP; + uint16_t Checksum; + uint16_t InitialIP; + uint16_t InitialRelativeCS; + uint16_t AddressOfRelocationTable; + uint16_t OverlayNumber; + uint16_t Reserved[4]; + uint16_t OEMid; + uint16_t OEMinfo; + uint16_t Reserved2[10]; + uint32_t AddressOfNewExeHeader; + }; + + struct PEHeader { + uint32_t Signature; + header COFFHeader; + uint16_t Magic; + uint8_t MajorLinkerVersion; + uint8_t MinorLinkerVersion; + uint32_t SizeOfCode; + uint32_t SizeOfInitializedData; + uint32_t SizeOfUninitializedData; + uint32_t AddressOfEntryPoint; // RVA + uint32_t BaseOfCode; // RVA + uint32_t BaseOfData; // RVA + uint64_t ImageBase; + uint32_t SectionAlignment; + uint32_t FileAlignment; + uint16_t MajorOperatingSystemVersion; + uint16_t MinorOperatingSystemVersion; + uint16_t MajorImageVersion; + uint16_t MinorImageVersion; + uint16_t MajorSubsystemVersion; + uint16_t MinorSubsystemVersion; + uint32_t Win32VersionValue; + uint32_t SizeOfImage; + uint32_t SizeOfHeaders; + uint32_t CheckSum; + uint16_t Subsystem; + uint16_t DLLCharacteristics; + uint64_t SizeOfStackReserve; + uint64_t SizeOfStackCommit; + uint64_t SizeOfHeapReserve; + uint64_t SizeOfHeapCommit; + uint32_t LoaderFlags; + uint32_t NumberOfRvaAndSize; + }; + + struct DataDirectory { + uint32_t RelativeVirtualAddress; + uint32_t Size; + }; + + enum WindowsSubsystem { + IMAGE_SUBSYSTEM_UNKNOWN = 0, ///< An unknown subsystem. + IMAGE_SUBSYSTEM_NATIVE = 1, ///< Device drivers and native Windows processes + IMAGE_SUBSYSTEM_WINDOWS_GUI = 2, ///< The Windows GUI subsystem. + IMAGE_SUBSYSTEM_WINDOWS_CUI = 3, ///< The Windows character subsystem. + IMAGE_SUBSYSTEM_POSIX_CUI = 7, ///< The POSIX character subsystem. + IMAGE_SUBSYSTEM_WINDOWS_CE_GUI = 9, ///< Windows CE. + IMAGE_SUBSYSTEM_EFI_APPLICATION = 10, ///< An EFI application. + IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER = 11, ///< An EFI driver with boot + /// services. + IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER = 12, ///< An EFI driver with run-time + /// services. + IMAGE_SUBSYSTEM_EFI_ROM = 13, ///< An EFI ROM image. + IMAGE_SUBSYSTEM_XBOX = 14 ///< XBOX. + }; + + enum DLLCharacteristics { + /// DLL can be relocated at load time. + IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE = 0x0040, + /// Code integrity checks are enforced. + IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY = 0x0080, + IMAGE_DLL_CHARACTERISTICS_NX_COMPAT = 0x0100, ///< Image is NX compatible. + /// Isolation aware, but do not isolate the image. + IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION = 0x0200, + /// Does not use structured exception handling (SEH). No SEH handler may be + /// called in this image. + IMAGE_DLL_CHARACTERISTICS_NO_SEH = 0x0400, + /// Do not bind the image. + IMAGE_DLL_CHARACTERISTICS_NO_BIND = 0x0800, + IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER = 0x2000, ///< A WDM driver. + /// Terminal Server aware. + IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE = 0x8000 + }; + + enum DebugType { + IMAGE_DEBUG_TYPE_UNKNOWN = 0, + IMAGE_DEBUG_TYPE_COFF = 1, + IMAGE_DEBUG_TYPE_CODEVIEW = 2, + IMAGE_DEBUG_TYPE_FPO = 3, + IMAGE_DEBUG_TYPE_MISC = 4, + IMAGE_DEBUG_TYPE_EXCEPTION = 5, + IMAGE_DEBUG_TYPE_FIXUP = 6, + IMAGE_DEBUG_TYPE_OMAP_TO_SRC = 7, + IMAGE_DEBUG_TYPE_OMAP_FROM_SRC = 8, + IMAGE_DEBUG_TYPE_BORLAND = 9, + IMAGE_DEBUG_TYPE_CLSID = 11 + }; + + enum BaseRelocationType { + IMAGE_REL_BASED_ABSOLUTE = 0, + IMAGE_REL_BASED_HIGH = 1, + IMAGE_REL_BASED_LOW = 2, + IMAGE_REL_BASED_HIGHLOW = 3, + IMAGE_REL_BASED_HIGHADJ = 4, + IMAGE_REL_BASED_MIPS_JMPADDR = 5, + IMAGE_REL_BASED_ARM_MOV32A = 5, + IMAGE_REL_BASED_ARM_MOV32T = 7, + IMAGE_REL_BASED_MIPS_JMPADDR16 = 9, + IMAGE_REL_BASED_DIR64 = 10 + }; + + enum ImportType { + IMPORT_CODE = 0, + IMPORT_DATA = 1, + IMPORT_CONST = 2 + }; + + enum ImportNameType { + /// Import is by ordinal. This indicates that the value in the Ordinal/Hint + /// field of the import header is the import's ordinal. If this constant is + /// not specified, then the Ordinal/Hint field should always be interpreted + /// as the import's hint. + IMPORT_ORDINAL = 0, + /// The import name is identical to the public symbol name + IMPORT_NAME = 1, + /// The import name is the public symbol name, but skipping the leading ?, + /// @, or optionally _. + IMPORT_NAME_NOPREFIX = 2, + /// The import name is the public symbol name, but skipping the leading ?, + /// @, or optionally _, and truncating at the first @. + IMPORT_NAME_UNDECORATE = 3 + }; + + struct ImportHeader { + uint16_t Sig1; ///< Must be IMAGE_FILE_MACHINE_UNKNOWN (0). + uint16_t Sig2; ///< Must be 0xFFFF. + uint16_t Version; + uint16_t Machine; + uint32_t TimeDateStamp; + uint32_t SizeOfData; + uint16_t OrdinalHint; + uint16_t TypeInfo; + + ImportType getType() const { + return static_cast(TypeInfo & 0x3); + } + + ImportNameType getNameType() const { + return static_cast((TypeInfo & 0x1C) >> 3); + } + }; + } // End namespace COFF. +} // End namespace llvm. #endif diff --git a/contrib/llvm/include/llvm/Support/CallSite.h b/contrib/llvm/include/llvm/Support/CallSite.h index 04b8c4e69c52..20634ede7644 100644 --- a/contrib/llvm/include/llvm/Support/CallSite.h +++ b/contrib/llvm/include/llvm/Support/CallSite.h @@ -237,6 +237,16 @@ class CallSiteBase { #undef CALLSITE_DELEGATE_GETTER #undef CALLSITE_DELEGATE_SETTER + /// @brief Determine whether this argument is not captured. + bool doesNotCapture(unsigned ArgNo) const { + return paramHasAttr(ArgNo + 1, Attribute::NoCapture); + } + + /// @brief Determine whether this argument is passed by value. + bool isByValArgument(unsigned ArgNo) const { + return paramHasAttr(ArgNo + 1, Attribute::ByVal); + } + /// hasArgument - Returns true if this CallSite passes the given Value* as an /// argument to the called function. bool hasArgument(const Value *Arg) const { diff --git a/contrib/llvm/include/llvm/Support/Capacity.h b/contrib/llvm/include/llvm/Support/Capacity.h index d8cda43b3576..7460f9825bd3 100644 --- a/contrib/llvm/include/llvm/Support/Capacity.h +++ b/contrib/llvm/include/llvm/Support/Capacity.h @@ -15,6 +15,8 @@ #ifndef LLVM_SUPPORT_CAPACITY_H #define LLVM_SUPPORT_CAPACITY_H +#include + namespace llvm { template diff --git a/contrib/llvm/include/llvm/Support/CodeGen.h b/contrib/llvm/include/llvm/Support/CodeGen.h index 41351dc73f3b..1b66c943895e 100644 --- a/contrib/llvm/include/llvm/Support/CodeGen.h +++ b/contrib/llvm/include/llvm/Support/CodeGen.h @@ -27,6 +27,26 @@ namespace llvm { enum Model { Default, JITDefault, Small, Kernel, Medium, Large }; } + // TLS models. + namespace TLSModel { + enum Model { + GeneralDynamic, + LocalDynamic, + InitialExec, + LocalExec + }; + } + + // Code generation optimization level. + namespace CodeGenOpt { + enum Level { + None, // -O0 + Less, // -O1 + Default, // -O2, -Os + Aggressive // -O3 + }; + } + } // end llvm namespace #endif diff --git a/contrib/llvm/include/llvm/Support/CommandLine.h b/contrib/llvm/include/llvm/Support/CommandLine.h index c6b62a8df9a4..c212d2d59f64 100644 --- a/contrib/llvm/include/llvm/Support/CommandLine.h +++ b/contrib/llvm/include/llvm/Support/CommandLine.h @@ -40,7 +40,7 @@ namespace cl { //===----------------------------------------------------------------------===// // ParseCommandLineOptions - Command line option processing entry point. // -void ParseCommandLineOptions(int argc, char **argv, +void ParseCommandLineOptions(int argc, const char * const *argv, const char *Overview = 0, bool ReadResponseFiles = false); @@ -83,10 +83,10 @@ void MarkOptionsChanged(); // enum NumOccurrencesFlag { // Flags for the number of occurrences allowed - Optional = 0x01, // Zero or One occurrence - ZeroOrMore = 0x02, // Zero or more occurrences allowed - Required = 0x03, // One occurrence required - OneOrMore = 0x04, // One or more occurrences required + Optional = 0x00, // Zero or One occurrence + ZeroOrMore = 0x01, // Zero or more occurrences allowed + Required = 0x02, // One occurrence required + OneOrMore = 0x03, // One or more occurrences required // ConsumeAfter - Indicates that this option is fed anything that follows the // last positional argument required by the application (it is an error if @@ -95,23 +95,20 @@ enum NumOccurrencesFlag { // Flags for the number of occurrences allowed // found. Once a filename is found, all of the succeeding arguments are // passed, unprocessed, to the ConsumeAfter option. // - ConsumeAfter = 0x05, - - OccurrencesMask = 0x07 + ConsumeAfter = 0x04 }; enum ValueExpected { // Is a value required for the option? - ValueOptional = 0x08, // The value can appear... or not - ValueRequired = 0x10, // The value is required to appear! - ValueDisallowed = 0x18, // A value may not be specified (for flags) - ValueMask = 0x18 + // zero reserved for the unspecified value + ValueOptional = 0x01, // The value can appear... or not + ValueRequired = 0x02, // The value is required to appear! + ValueDisallowed = 0x03 // A value may not be specified (for flags) }; enum OptionHidden { // Control whether -help shows this option - NotHidden = 0x20, // Option included in -help & -help-hidden - Hidden = 0x40, // -help doesn't, but -help-hidden does - ReallyHidden = 0x60, // Neither -help nor -help-hidden show this arg - HiddenMask = 0x60 + NotHidden = 0x00, // Option included in -help & -help-hidden + Hidden = 0x01, // -help doesn't, but -help-hidden does + ReallyHidden = 0x02 // Neither -help nor -help-hidden show this arg }; // Formatting flags - This controls special features that the option might have @@ -130,18 +127,16 @@ enum OptionHidden { // Control whether -help shows this option // enum FormattingFlags { - NormalFormatting = 0x000, // Nothing special - Positional = 0x080, // Is a positional argument, no '-' required - Prefix = 0x100, // Can this option directly prefix its value? - Grouping = 0x180, // Can this option group with other options? - FormattingMask = 0x180 // Union of the above flags. + NormalFormatting = 0x00, // Nothing special + Positional = 0x01, // Is a positional argument, no '-' required + Prefix = 0x02, // Can this option directly prefix its value? + Grouping = 0x03 // Can this option group with other options? }; enum MiscFlags { // Miscellaneous flags to adjust argument - CommaSeparated = 0x200, // Should this cl::list split between commas? - PositionalEatsArgs = 0x400, // Should this positional cl::list eat -args? - Sink = 0x800, // Should this cl::list eat all unknown options? - MiscMask = 0xE00 // Union of the above flags. + CommaSeparated = 0x01, // Should this cl::list split between commas? + PositionalEatsArgs = 0x02, // Should this positional cl::list eat -args? + Sink = 0x04 // Should this cl::list eat all unknown options? }; @@ -168,7 +163,15 @@ class Option { virtual void anchor(); int NumOccurrences; // The number of times specified - int Flags; // Flags for the argument + // Occurrences, HiddenFlag, and Formatting are all enum types but to avoid + // problems with signed enums in bitfields. + unsigned Occurrences : 3; // enum NumOccurrencesFlag + // not using the enum type for 'Value' because zero is an implementation + // detail representing the non-value + unsigned Value : 2; + unsigned HiddenFlag : 2; // enum OptionHidden + unsigned Formatting : 2; // enum FormattingFlags + unsigned Misc : 3; unsigned Position; // Position of last occurrence of the option unsigned AdditionalVals;// Greater than 0 for multi-valued option. Option *NextRegistered; // Singly linked list of registered options. @@ -178,21 +181,20 @@ class Option { const char *ValueStr; // String describing what the value of this option is inline enum NumOccurrencesFlag getNumOccurrencesFlag() const { - return static_cast(Flags & OccurrencesMask); + return (enum NumOccurrencesFlag)Occurrences; } inline enum ValueExpected getValueExpectedFlag() const { - int VE = Flags & ValueMask; - return VE ? static_cast(VE) + return Value ? ((enum ValueExpected)Value) : getValueExpectedFlagDefault(); } inline enum OptionHidden getOptionHiddenFlag() const { - return static_cast(Flags & HiddenMask); + return (enum OptionHidden)HiddenFlag; } inline enum FormattingFlags getFormattingFlag() const { - return static_cast(Flags & FormattingMask); + return (enum FormattingFlags)Formatting; } inline unsigned getMiscFlags() const { - return Flags & MiscMask; + return Misc; } inline unsigned getPosition() const { return Position; } inline unsigned getNumAdditionalVals() const { return AdditionalVals; } @@ -206,27 +208,21 @@ class Option { void setArgStr(const char *S) { ArgStr = S; } void setDescription(const char *S) { HelpStr = S; } void setValueStr(const char *S) { ValueStr = S; } - - void setFlag(unsigned Flag, unsigned FlagMask) { - Flags &= ~FlagMask; - Flags |= Flag; - } - void setNumOccurrencesFlag(enum NumOccurrencesFlag Val) { - setFlag(Val, OccurrencesMask); + Occurrences = Val; } - void setValueExpectedFlag(enum ValueExpected Val) { setFlag(Val, ValueMask); } - void setHiddenFlag(enum OptionHidden Val) { setFlag(Val, HiddenMask); } - void setFormattingFlag(enum FormattingFlags V) { setFlag(V, FormattingMask); } - void setMiscFlag(enum MiscFlags M) { setFlag(M, M); } + void setValueExpectedFlag(enum ValueExpected Val) { Value = Val; } + void setHiddenFlag(enum OptionHidden Val) { HiddenFlag = Val; } + void setFormattingFlag(enum FormattingFlags V) { Formatting = V; } + void setMiscFlag(enum MiscFlags M) { Misc |= M; } void setPosition(unsigned pos) { Position = pos; } protected: - explicit Option(unsigned DefaultFlags) - : NumOccurrences(0), Flags(DefaultFlags | NormalFormatting), Position(0), + explicit Option(enum NumOccurrencesFlag Occurrences, + enum OptionHidden Hidden) + : NumOccurrences(0), Occurrences(Occurrences), HiddenFlag(Hidden), + Formatting(NormalFormatting), Position(0), AdditionalVals(0), NextRegistered(0), ArgStr(""), HelpStr(""), ValueStr("") { - assert(getNumOccurrencesFlag() != 0 && - getOptionHiddenFlag() != 0 && "Not all default flags specified!"); } inline void setNumAdditionalVals(unsigned n) { AdditionalVals = n; } @@ -326,6 +322,8 @@ LocationClass location(Ty &L) { return LocationClass(L); } struct GenericOptionValue { virtual ~GenericOptionValue() {} virtual bool compare(const GenericOptionValue &V) const = 0; +private: + virtual void anchor(); }; template struct OptionValue; @@ -339,7 +337,7 @@ struct OptionValueBase : public GenericOptionValue { bool hasValue() const { return false; } - const DataType &getValue() const { assert(false && "no default value"); } + const DataType &getValue() const { llvm_unreachable("no default value"); } // Some options may take their value from a different data type. template @@ -416,6 +414,8 @@ struct OptionValue : OptionValueCopy { setValue(V); return *this; } +private: + virtual void anchor(); }; template<> @@ -431,6 +431,8 @@ struct OptionValue : OptionValueCopy { setValue(V); return *this; } +private: + virtual void anchor(); }; //===----------------------------------------------------------------------===// @@ -1171,14 +1173,14 @@ class opt : public Option, // One option... template - explicit opt(const M0t &M0) : Option(Optional | NotHidden) { + explicit opt(const M0t &M0) : Option(Optional, NotHidden) { apply(M0, this); done(); } // Two options... template - opt(const M0t &M0, const M1t &M1) : Option(Optional | NotHidden) { + opt(const M0t &M0, const M1t &M1) : Option(Optional, NotHidden) { apply(M0, this); apply(M1, this); done(); } @@ -1186,21 +1188,21 @@ class opt : public Option, // Three options... template opt(const M0t &M0, const M1t &M1, - const M2t &M2) : Option(Optional | NotHidden) { + const M2t &M2) : Option(Optional, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); done(); } // Four options... template opt(const M0t &M0, const M1t &M1, const M2t &M2, - const M3t &M3) : Option(Optional | NotHidden) { + const M3t &M3) : Option(Optional, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); done(); } // Five options... template opt(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, - const M4t &M4) : Option(Optional | NotHidden) { + const M4t &M4) : Option(Optional, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); done(); @@ -1209,7 +1211,7 @@ class opt : public Option, template opt(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, - const M4t &M4, const M5t &M5) : Option(Optional | NotHidden) { + const M4t &M4, const M5t &M5) : Option(Optional, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); apply(M5, this); done(); @@ -1219,7 +1221,7 @@ class opt : public Option, class M4t, class M5t, class M6t> opt(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, const M4t &M4, const M5t &M5, - const M6t &M6) : Option(Optional | NotHidden) { + const M6t &M6) : Option(Optional, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); apply(M5, this); apply(M6, this); done(); @@ -1229,7 +1231,7 @@ class opt : public Option, class M4t, class M5t, class M6t, class M7t> opt(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, const M4t &M4, const M5t &M5, const M6t &M6, - const M7t &M7) : Option(Optional | NotHidden) { + const M7t &M7) : Option(Optional, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); apply(M5, this); apply(M6, this); apply(M7, this); done(); @@ -1338,34 +1340,34 @@ class list : public Option, public list_storage { // One option... template - explicit list(const M0t &M0) : Option(ZeroOrMore | NotHidden) { + explicit list(const M0t &M0) : Option(ZeroOrMore, NotHidden) { apply(M0, this); done(); } // Two options... template - list(const M0t &M0, const M1t &M1) : Option(ZeroOrMore | NotHidden) { + list(const M0t &M0, const M1t &M1) : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); done(); } // Three options... template list(const M0t &M0, const M1t &M1, const M2t &M2) - : Option(ZeroOrMore | NotHidden) { + : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); done(); } // Four options... template list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3) - : Option(ZeroOrMore | NotHidden) { + : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); done(); } // Five options... template list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, - const M4t &M4) : Option(ZeroOrMore | NotHidden) { + const M4t &M4) : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); done(); @@ -1374,7 +1376,7 @@ class list : public Option, public list_storage { template list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, - const M4t &M4, const M5t &M5) : Option(ZeroOrMore | NotHidden) { + const M4t &M4, const M5t &M5) : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); apply(M5, this); done(); @@ -1384,7 +1386,7 @@ class list : public Option, public list_storage { class M4t, class M5t, class M6t> list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, const M4t &M4, const M5t &M5, const M6t &M6) - : Option(ZeroOrMore | NotHidden) { + : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); apply(M5, this); apply(M6, this); done(); @@ -1394,7 +1396,7 @@ class list : public Option, public list_storage { class M4t, class M5t, class M6t, class M7t> list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, const M4t &M4, const M5t &M5, const M6t &M6, - const M7t &M7) : Option(ZeroOrMore | NotHidden) { + const M7t &M7) : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); apply(M5, this); apply(M6, this); apply(M7, this); done(); @@ -1536,34 +1538,34 @@ class bits : public Option, public bits_storage { // One option... template - explicit bits(const M0t &M0) : Option(ZeroOrMore | NotHidden) { + explicit bits(const M0t &M0) : Option(ZeroOrMore, NotHidden) { apply(M0, this); done(); } // Two options... template - bits(const M0t &M0, const M1t &M1) : Option(ZeroOrMore | NotHidden) { + bits(const M0t &M0, const M1t &M1) : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); done(); } // Three options... template bits(const M0t &M0, const M1t &M1, const M2t &M2) - : Option(ZeroOrMore | NotHidden) { + : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); done(); } // Four options... template bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3) - : Option(ZeroOrMore | NotHidden) { + : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); done(); } // Five options... template bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, - const M4t &M4) : Option(ZeroOrMore | NotHidden) { + const M4t &M4) : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); done(); @@ -1572,7 +1574,7 @@ class bits : public Option, public bits_storage { template bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, - const M4t &M4, const M5t &M5) : Option(ZeroOrMore | NotHidden) { + const M4t &M4, const M5t &M5) : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); apply(M5, this); done(); @@ -1582,7 +1584,7 @@ class bits : public Option, public bits_storage { class M4t, class M5t, class M6t> bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, const M4t &M4, const M5t &M5, const M6t &M6) - : Option(ZeroOrMore | NotHidden) { + : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); apply(M5, this); apply(M6, this); done(); @@ -1592,7 +1594,7 @@ class bits : public Option, public bits_storage { class M4t, class M5t, class M6t, class M7t> bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3, const M4t &M4, const M5t &M5, const M6t &M6, - const M7t &M7) : Option(ZeroOrMore | NotHidden) { + const M7t &M7) : Option(ZeroOrMore, NotHidden) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); apply(M4, this); apply(M5, this); apply(M6, this); apply(M7, this); done(); @@ -1632,27 +1634,27 @@ class alias : public Option { // One option... template - explicit alias(const M0t &M0) : Option(Optional | Hidden), AliasFor(0) { + explicit alias(const M0t &M0) : Option(Optional, Hidden), AliasFor(0) { apply(M0, this); done(); } // Two options... template - alias(const M0t &M0, const M1t &M1) : Option(Optional | Hidden), AliasFor(0) { + alias(const M0t &M0, const M1t &M1) : Option(Optional, Hidden), AliasFor(0) { apply(M0, this); apply(M1, this); done(); } // Three options... template alias(const M0t &M0, const M1t &M1, const M2t &M2) - : Option(Optional | Hidden), AliasFor(0) { + : Option(Optional, Hidden), AliasFor(0) { apply(M0, this); apply(M1, this); apply(M2, this); done(); } // Four options... template alias(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3) - : Option(Optional | Hidden), AliasFor(0) { + : Option(Optional, Hidden), AliasFor(0) { apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this); done(); } diff --git a/contrib/llvm/include/llvm/Support/Compiler.h b/contrib/llvm/include/llvm/Support/Compiler.h index e0921572182b..d0b186ea7c2b 100644 --- a/contrib/llvm/include/llvm/Support/Compiler.h +++ b/contrib/llvm/include/llvm/Support/Compiler.h @@ -49,16 +49,22 @@ #define LLVM_ATTRIBUTE_UNUSED #endif -#ifdef __GNUC__ // aka 'ATTRIBUTE_CONST' but following LLVM Conventions. -#define LLVM_ATTRIBUTE_READNONE __attribute__((__const__)) +#if (__GNUC__ >= 4) && !defined(__MINGW32__) && !defined(__CYGWIN__) +#define LLVM_ATTRIBUTE_WEAK __attribute__((__weak__)) #else -#define LLVM_ATTRIBUTE_READNONE +#define LLVM_ATTRIBUTE_WEAK #endif -#ifdef __GNUC__ // aka 'ATTRIBUTE_PURE' but following LLVM Conventions. -#define LLVM_ATTRIBUTE_READONLY __attribute__((__pure__)) +#ifdef __GNUC__ // aka 'CONST' but following LLVM Conventions. +#define LLVM_READNONE __attribute__((__const__)) #else -#define LLVM_ATTRIBUTE_READONLY +#define LLVM_READNONE +#endif + +#ifdef __GNUC__ // aka 'PURE' but following LLVM Conventions. +#define LLVM_READONLY __attribute__((__pure__)) +#else +#define LLVM_READONLY #endif #if (__GNUC__ >= 4) @@ -67,6 +73,7 @@ #define BUILTIN_EXPECT(EXPR, VALUE) (EXPR) #endif + // C++ doesn't support 'extern template' of template specializations. GCC does, // but requires __extension__ before it. In the header, use this: // EXTERN_TEMPLATE_INSTANTIATION(class foo); @@ -111,6 +118,14 @@ #define LLVM_ATTRIBUTE_NORETURN #endif +// LLVM_EXTENSION - Support compilers where we have a keyword to suppress +// pedantic diagnostics. +#ifdef __GNUC__ +#define LLVM_EXTENSION __extension__ +#else +#define LLVM_EXTENSION +#endif + // LLVM_ATTRIBUTE_DEPRECATED(decl, "message") #if __has_feature(attribute_deprecated_with_message) # define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \ diff --git a/contrib/llvm/include/llvm/Support/DOTGraphTraits.h b/contrib/llvm/include/llvm/Support/DOTGraphTraits.h index 3cb8164c3c3d..483f2674af7b 100644 --- a/contrib/llvm/include/llvm/Support/DOTGraphTraits.h +++ b/contrib/llvm/include/llvm/Support/DOTGraphTraits.h @@ -42,13 +42,13 @@ struct DefaultDOTGraphTraits { /// top of the graph. /// template - static std::string getGraphName(const GraphType& Graph) { return ""; } + static std::string getGraphName(const GraphType &) { return ""; } /// getGraphProperties - Return any custom properties that should be included /// in the top level graph structure for dot. /// template - static std::string getGraphProperties(const GraphType& Graph) { + static std::string getGraphProperties(const GraphType &) { return ""; } @@ -61,44 +61,44 @@ struct DefaultDOTGraphTraits { /// isNodeHidden - If the function returns true, the given node is not /// displayed in the graph. - static bool isNodeHidden(const void *Node) { + static bool isNodeHidden(const void *) { return false; } /// getNodeLabel - Given a node and a pointer to the top level graph, return /// the label to print in the node. template - std::string getNodeLabel(const void *Node, const GraphType& Graph) { + std::string getNodeLabel(const void *, const GraphType &) { return ""; } /// hasNodeAddressLabel - If this method returns true, the address of the node /// is added to the label of the node. template - static bool hasNodeAddressLabel(const void *Node, const GraphType& Graph) { + static bool hasNodeAddressLabel(const void *, const GraphType &) { return false; } /// If you want to specify custom node attributes, this is the place to do so /// template - static std::string getNodeAttributes(const void *Node, - const GraphType& Graph) { + static std::string getNodeAttributes(const void *, + const GraphType &) { return ""; } /// If you want to override the dot attributes printed for a particular edge, /// override this method. template - static std::string getEdgeAttributes(const void *Node, EdgeIter EI, - const GraphType& Graph) { + static std::string getEdgeAttributes(const void *, EdgeIter, + const GraphType &) { return ""; } /// getEdgeSourceLabel - If you want to label the edge source itself, /// implement this method. template - static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) { + static std::string getEdgeSourceLabel(const void *, EdgeIter) { return ""; } @@ -106,7 +106,7 @@ struct DefaultDOTGraphTraits { /// should actually target another edge source, not a node. If this method is /// implemented, getEdgeTarget should be implemented. template - static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) { + static bool edgeTargetsEdgeSource(const void *, EdgeIter) { return false; } @@ -114,7 +114,7 @@ struct DefaultDOTGraphTraits { /// called to determine which outgoing edge of Node is the target of this /// edge. template - static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) { + static EdgeIter getEdgeTarget(const void *, EdgeIter I) { return I; } @@ -126,13 +126,13 @@ struct DefaultDOTGraphTraits { /// numEdgeDestLabels - If hasEdgeDestLabels, this function returns the /// number of incoming edge labels the given node has. - static unsigned numEdgeDestLabels(const void *Node) { + static unsigned numEdgeDestLabels(const void *) { return 0; } /// getEdgeDestLabel - If hasEdgeDestLabels, this function returns the /// incoming edge label with the given index in the given node. - static std::string getEdgeDestLabel(const void *Node, unsigned i) { + static std::string getEdgeDestLabel(const void *, unsigned) { return ""; } @@ -143,7 +143,7 @@ struct DefaultDOTGraphTraits { /// it to add things to the output graph. /// template - static void addCustomGraphFeatures(const GraphType& Graph, GraphWriter &GW) {} + static void addCustomGraphFeatures(const GraphType &, GraphWriter &) {} }; diff --git a/contrib/llvm/include/llvm/Support/DataStream.h b/contrib/llvm/include/llvm/Support/DataStream.h new file mode 100644 index 000000000000..fedb0c925611 --- /dev/null +++ b/contrib/llvm/include/llvm/Support/DataStream.h @@ -0,0 +1,38 @@ +//===---- llvm/Support/DataStream.h - Lazy bitcode streaming ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header defines DataStreamer, which fetches bytes of data from +// a stream source. It provides support for streaming (lazy reading) of +// data, e.g. bitcode +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_SUPPORT_DATASTREAM_H_ +#define LLVM_SUPPORT_DATASTREAM_H_ + +#include + +namespace llvm { + +class DataStreamer { +public: + /// Fetch bytes [start-end) from the stream, and write them to the + /// buffer pointed to by buf. Returns the number of bytes actually written. + virtual size_t GetBytes(unsigned char *buf, size_t len) = 0; + + virtual ~DataStreamer(); +}; + +DataStreamer *getDataFileStreamer(const std::string &Filename, + std::string *Err); + +} + +#endif // LLVM_SUPPORT_DATASTREAM_H_ diff --git a/contrib/llvm/include/llvm/Support/DataTypes.h.in b/contrib/llvm/include/llvm/Support/DataTypes.h.in index 425805a1669b..b492bb14ba50 100644 --- a/contrib/llvm/include/llvm/Support/DataTypes.h.in +++ b/contrib/llvm/include/llvm/Support/DataTypes.h.in @@ -167,9 +167,24 @@ typedef signed int ssize_t; # define UINT64_C(C) C##ui64 #endif +#ifndef PRId64 +# define PRId64 "I64d" +#endif +#ifndef PRIi64 +# define PRIi64 "I64i" +#endif +#ifndef PRIo64 +# define PRIo64 "I64o" +#endif +#ifndef PRIu64 +# define PRIu64 "I64u" +#endif #ifndef PRIx64 # define PRIx64 "I64x" #endif +#ifndef PRIX64 +# define PRIX64 "I64X" +#endif #endif /* _MSC_VER */ diff --git a/contrib/llvm/include/llvm/Support/Debug.h b/contrib/llvm/include/llvm/Support/Debug.h index 8651fc1abea9..e72327271f23 100644 --- a/contrib/llvm/include/llvm/Support/Debug.h +++ b/contrib/llvm/include/llvm/Support/Debug.h @@ -35,14 +35,14 @@ class raw_ostream; #ifndef DEBUG_TYPE #define DEBUG_TYPE "" #endif - + #ifndef NDEBUG /// DebugFlag - This boolean is set to true if the '-debug' command line option /// is specified. This should probably not be referenced directly, instead, use /// the DEBUG macro below. /// extern bool DebugFlag; - + /// isCurrentDebugType - Return true if the specified string is the debug type /// specified on the command line, or if none was specified on the command line /// with the -debug-only=X option. @@ -54,7 +54,7 @@ bool isCurrentDebugType(const char *Type); /// debug output to be produced. /// void SetCurrentDebugType(const char *Type); - + /// DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug /// information. In the '-debug' option is specified on the commandline, and if /// this is a debug build, then the code specified as the option to the macro diff --git a/contrib/llvm/include/llvm/Support/Dwarf.h b/contrib/llvm/include/llvm/Support/Dwarf.h index 30f91874db20..8f18a991a9e1 100644 --- a/contrib/llvm/include/llvm/Support/Dwarf.h +++ b/contrib/llvm/include/llvm/Support/Dwarf.h @@ -22,7 +22,8 @@ namespace llvm { // Debug info constants. enum { - LLVMDebugVersion = (11 << 16), // Current version of debug information. + LLVMDebugVersion = (12 << 16), // Current version of debug information. + LLVMDebugVersion11 = (11 << 16), // Constant for version 11. LLVMDebugVersion10 = (10 << 16), // Constant for version 10. LLVMDebugVersion9 = (9 << 16), // Constant for version 9. LLVMDebugVersion8 = (8 << 16), // Constant for version 8. @@ -130,6 +131,7 @@ enum dwarf_constants { DW_TAG_GNU_template_parameter_pack = 0x4107, DW_TAG_GNU_formal_parameter_pack = 0x4108, DW_TAG_lo_user = 0x4080, + DW_TAG_APPLE_property = 0x4200, DW_TAG_hi_user = 0xffff, // Children flag @@ -269,6 +271,7 @@ enum dwarf_constants { DW_AT_APPLE_property_setter = 0x3fea, DW_AT_APPLE_property_attribute = 0x3feb, DW_AT_APPLE_objc_complete_type = 0x3fec, + DW_AT_APPLE_property = 0x3fed, // Attribute form encodings DW_FORM_addr = 0x01, @@ -526,6 +529,7 @@ enum dwarf_constants { DW_LANG_D = 0x0013, DW_LANG_Python = 0x0014, DW_LANG_lo_user = 0x8000, + DW_LANG_Mips_Assembler = 0x8001, DW_LANG_hi_user = 0xffff, // Identifier case codes diff --git a/contrib/llvm/include/llvm/Support/DynamicLibrary.h b/contrib/llvm/include/llvm/Support/DynamicLibrary.h index 288936bc0b9b..0f59cbf23947 100644 --- a/contrib/llvm/include/llvm/Support/DynamicLibrary.h +++ b/contrib/llvm/include/llvm/Support/DynamicLibrary.h @@ -17,6 +17,9 @@ #include namespace llvm { + +class StringRef; + namespace sys { /// This class provides a portable interface to dynamic libraries which also diff --git a/contrib/llvm/include/llvm/Support/ELF.h b/contrib/llvm/include/llvm/Support/ELF.h index c5b85e2e6a12..04953b6e5657 100644 --- a/contrib/llvm/include/llvm/Support/ELF.h +++ b/contrib/llvm/include/llvm/Support/ELF.h @@ -599,7 +599,25 @@ enum { R_ARM_THM_TLS_DESCSEQ32 = 0x82 }; +// Mips Specific e_flags +enum { + EF_MIPS_NOREORDER = 0x00000001, // Don't reorder instructions + EF_MIPS_PIC = 0x00000002, // Position independent code + EF_MIPS_CPIC = 0x00000004, // Call object with Position independent code + EF_MIPS_ARCH_1 = 0x00000000, // MIPS1 instruction set + EF_MIPS_ARCH_2 = 0x10000000, // MIPS2 instruction set + EF_MIPS_ARCH_3 = 0x20000000, // MIPS3 instruction set + EF_MIPS_ARCH_4 = 0x30000000, // MIPS4 instruction set + EF_MIPS_ARCH_5 = 0x40000000, // MIPS5 instruction set + EF_MIPS_ARCH_32 = 0x50000000, // MIPS32 instruction set per linux not elf.h + EF_MIPS_ARCH_64 = 0x60000000, // MIPS64 instruction set per linux not elf.h + EF_MIPS_ARCH_32R2 = 0x70000000, // mips32r2 + EF_MIPS_ARCH_64R2 = 0x80000000, // mips64r2 + EF_MIPS_ARCH = 0xf0000000 // Mask for applying EF_MIPS_ARCH_ variant +}; + // ELF Relocation types for Mips +// . enum { R_MIPS_NONE = 0, R_MIPS_16 = 1, @@ -611,6 +629,7 @@ enum { R_MIPS_GPREL16 = 7, R_MIPS_LITERAL = 8, R_MIPS_GOT16 = 9, + R_MIPS_GOT = 9, R_MIPS_PC16 = 10, R_MIPS_CALL16 = 11, R_MIPS_GPREL32 = 12, @@ -717,6 +736,9 @@ enum { SHT_GROUP = 17, // Section group. SHT_SYMTAB_SHNDX = 18, // Indices for SHN_XINDEX entries. SHT_LOOS = 0x60000000, // Lowest operating system-specific type. + SHT_GNU_verdef = 0x6ffffffd, // GNU version definitions. + SHT_GNU_verneed = 0x6ffffffe, // GNU version references. + SHT_GNU_versym = 0x6fffffff, // GNU symbol versions table. SHT_HIOS = 0x6fffffff, // Highest operating system-specific type. SHT_LOPROC = 0x70000000, // Lowest processor architecture-specific type. // Fixme: All this is duplicated in MCSectionELF. Why?? @@ -871,6 +893,7 @@ enum { STT_TLS = 6, // Thread local data object STT_LOOS = 7, // Lowest operating system-specific symbol type STT_HIOS = 8, // Highest operating system-specific symbol type + STT_GNU_IFUNC = 10, // GNU indirect function STT_LOPROC = 13, // Lowest processor-specific symbol type STT_HIPROC = 15 // Highest processor-specific symbol type }; @@ -1084,6 +1107,33 @@ enum { DF_STATIC_TLS = 0x10 // Reject attempts to load dynamically. }; +// ElfXX_VerDef structure version (GNU versioning) +enum { + VER_DEF_NONE = 0, + VER_DEF_CURRENT = 1 +}; + +// VerDef Flags (ElfXX_VerDef::vd_flags) +enum { + VER_FLG_BASE = 0x1, + VER_FLG_WEAK = 0x2, + VER_FLG_INFO = 0x4 +}; + +// Special constants for the version table. (SHT_GNU_versym/.gnu.version) +enum { + VER_NDX_LOCAL = 0, // Unversioned local symbol + VER_NDX_GLOBAL = 1, // Unversioned global symbol + VERSYM_VERSION = 0x7fff, // Version Index mask + VERSYM_HIDDEN = 0x8000 // Hidden bit (non-default version) +}; + +// ElfXX_VerNeed structure version (GNU versioning) +enum { + VER_NEED_NONE = 0, + VER_NEED_CURRENT = 1 +}; + } // end namespace ELF } // end namespace llvm diff --git a/contrib/llvm/include/llvm/Support/Endian.h b/contrib/llvm/include/llvm/Support/Endian.h index af1b506d6cf4..733ab7548fca 100644 --- a/contrib/llvm/include/llvm/Support/Endian.h +++ b/contrib/llvm/include/llvm/Support/Endian.h @@ -98,6 +98,9 @@ class packed_endian_specific_integral { operator value_type() const { return endian::read_le(Value); } + void operator=(value_type newValue) { + endian::write_le((void *)&Value, newValue); + } private: uint8_t Value[sizeof(value_type)]; }; @@ -108,6 +111,9 @@ class packed_endian_specific_integral { operator value_type() const { return endian::read_be(Value); } + void operator=(value_type newValue) { + endian::write_be((void *)&Value, newValue); + } private: uint8_t Value[sizeof(value_type)]; }; @@ -118,6 +124,9 @@ class packed_endian_specific_integral { operator value_type() const { return endian::read_le(&Value); } + void operator=(value_type newValue) { + endian::write_le((void *)&Value, newValue); + } private: value_type Value; }; @@ -128,6 +137,9 @@ class packed_endian_specific_integral { operator value_type() const { return endian::read_be(&Value); } + void operator=(value_type newValue) { + endian::write_be((void *)&Value, newValue); + } private: value_type Value; }; diff --git a/contrib/llvm/include/llvm/Support/FileSystem.h b/contrib/llvm/include/llvm/Support/FileSystem.h index a868e5f9f70b..e6f9926af6f8 100644 --- a/contrib/llvm/include/llvm/Support/FileSystem.h +++ b/contrib/llvm/include/llvm/Support/FileSystem.h @@ -27,14 +27,21 @@ #ifndef LLVM_SUPPORT_FILE_SYSTEM_H #define LLVM_SUPPORT_FILE_SYSTEM_H +#include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/PathV1.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/system_error.h" #include #include +#include #include +#include + +#if HAVE_SYS_STAT_H +#include +#endif namespace llvm { namespace sys { @@ -91,7 +98,20 @@ struct space_info { /// a platform specific member to store the result. class file_status { - // implementation defined status field. + #if defined(LLVM_ON_UNIX) + dev_t st_dev; + ino_t st_ino; + #elif defined (LLVM_ON_WIN32) + uint32_t LastWriteTimeHigh; + uint32_t LastWriteTimeLow; + uint32_t VolumeSerialNumber; + uint32_t FileSizeHigh; + uint32_t FileSizeLow; + uint32_t FileIndexHigh; + uint32_t FileIndexLow; + #endif + friend bool equivalent(file_status A, file_status B); + friend error_code status(const Twine &path, file_status &result); file_type Type; public: explicit file_status(file_type v=file_type::status_error) @@ -101,6 +121,44 @@ class file_status void type(file_type v) { Type = v; } }; +/// file_magic - An "enum class" enumeration of file types based on magic (the first +/// N bytes of the file). +struct file_magic { + enum _ { + unknown = 0, ///< Unrecognized file + bitcode, ///< Bitcode file + archive, ///< ar style archive file + elf_relocatable, ///< ELF Relocatable object file + elf_executable, ///< ELF Executable image + elf_shared_object, ///< ELF dynamically linked shared lib + elf_core, ///< ELF core image + macho_object, ///< Mach-O Object file + macho_executable, ///< Mach-O Executable + macho_fixed_virtual_memory_shared_lib, ///< Mach-O Shared Lib, FVM + macho_core, ///< Mach-O Core File + macho_preload_executabl, ///< Mach-O Preloaded Executable + macho_dynamically_linked_shared_lib, ///< Mach-O dynlinked shared lib + macho_dynamic_linker, ///< The Mach-O dynamic linker + macho_bundle, ///< Mach-O Bundle file + macho_dynamically_linked_shared_lib_stub, ///< Mach-O Shared lib stub + macho_dsym_companion, ///< Mach-O dSYM companion file + coff_object, ///< COFF object file + pecoff_executable ///< PECOFF executable file + }; + + bool is_object() const { + return v_ == unknown ? false : true; + } + + file_magic() : v_(unknown) {} + file_magic(_ v) : v_(v) {} + explicit file_magic(int v) : v_(_(v)) {} + operator int() const {return v_;} + +private: + int v_; +}; + /// @} /// @name Physical Operators /// @{ @@ -241,6 +299,8 @@ bool equivalent(file_status A, file_status B); /// @brief Do paths represent the same thing? /// +/// assert(status_known(A) || status_known(B)); +/// /// @param A Input path A. /// @param B Input path B. /// @param result Set to true if stat(A) and stat(B) have the same device and @@ -397,13 +457,16 @@ error_code has_magic(const Twine &path, const Twine &magic, bool &result); error_code get_magic(const Twine &path, uint32_t len, SmallVectorImpl &result); +/// @brief Identify the type of a binary file based on how magical it is. +file_magic identify_magic(StringRef magic); + /// @brief Get and identify \a path's type based on its content. /// /// @param path Input path. /// @param result Set to the type of file, or LLVMFileType::Unknown_FileType. /// @results errc::success if result has been successfully set, otherwise a /// platform specific error_code. -error_code identify_magic(const Twine &path, LLVMFileType &result); +error_code identify_magic(const Twine &path, file_magic &result); /// @brief Get library paths the system linker uses. /// @@ -479,76 +542,171 @@ class directory_entry { bool operator>=(const directory_entry& rhs) const; }; +namespace detail { + struct DirIterState; + + error_code directory_iterator_construct(DirIterState&, StringRef); + error_code directory_iterator_increment(DirIterState&); + error_code directory_iterator_destruct(DirIterState&); + + /// DirIterState - Keeps state for the directory_iterator. It is reference + /// counted in order to preserve InputIterator semantics on copy. + struct DirIterState : public RefCountedBase { + DirIterState() + : IterationHandle(0) {} + + ~DirIterState() { + directory_iterator_destruct(*this); + } + + intptr_t IterationHandle; + directory_entry CurrentEntry; + }; +} + /// directory_iterator - Iterates through the entries in path. There is no /// operator++ because we need an error_code. If it's really needed we can make /// it call report_fatal_error on error. class directory_iterator { - intptr_t IterationHandle; - directory_entry CurrentEntry; - - // Platform implementations implement these functions to handle iteration. - friend error_code directory_iterator_construct(directory_iterator &it, - StringRef path); - friend error_code directory_iterator_increment(directory_iterator &it); - friend error_code directory_iterator_destruct(directory_iterator &it); + IntrusiveRefCntPtr State; public: - explicit directory_iterator(const Twine &path, error_code &ec) - : IterationHandle(0) { + explicit directory_iterator(const Twine &path, error_code &ec) { + State = new detail::DirIterState; SmallString<128> path_storage; - ec = directory_iterator_construct(*this, path.toStringRef(path_storage)); + ec = detail::directory_iterator_construct(*State, + path.toStringRef(path_storage)); + } + + explicit directory_iterator(const directory_entry &de, error_code &ec) { + State = new detail::DirIterState; + ec = detail::directory_iterator_construct(*State, de.path()); } /// Construct end iterator. - directory_iterator() : IterationHandle(0) {} - - ~directory_iterator() { - directory_iterator_destruct(*this); - } + directory_iterator() : State(new detail::DirIterState) {} // No operator++ because we need error_code. directory_iterator &increment(error_code &ec) { - ec = directory_iterator_increment(*this); + ec = directory_iterator_increment(*State); return *this; } - const directory_entry &operator*() const { return CurrentEntry; } - const directory_entry *operator->() const { return &CurrentEntry; } + const directory_entry &operator*() const { return State->CurrentEntry; } + const directory_entry *operator->() const { return &State->CurrentEntry; } + + bool operator==(const directory_iterator &RHS) const { + return State->CurrentEntry == RHS.State->CurrentEntry; + } bool operator!=(const directory_iterator &RHS) const { - return CurrentEntry != RHS.CurrentEntry; + return !(*this == RHS); } // Other members as required by // C++ Std, 24.1.1 Input iterators [input.iterators] }; +namespace detail { + /// RecDirIterState - Keeps state for the recursive_directory_iterator. It is + /// reference counted in order to preserve InputIterator semantics on copy. + struct RecDirIterState : public RefCountedBase { + RecDirIterState() + : Level(0) + , HasNoPushRequest(false) {} + + std::stack > Stack; + uint16_t Level; + bool HasNoPushRequest; + }; +} + /// recursive_directory_iterator - Same as directory_iterator except for it /// recurses down into child directories. class recursive_directory_iterator { - uint16_t Level; - bool HasNoPushRequest; - // implementation directory iterator status + IntrusiveRefCntPtr State; public: - explicit recursive_directory_iterator(const Twine &path, error_code &ec); + recursive_directory_iterator() {} + explicit recursive_directory_iterator(const Twine &path, error_code &ec) + : State(new detail::RecDirIterState) { + State->Stack.push(directory_iterator(path, ec)); + if (State->Stack.top() == directory_iterator()) + State.reset(); + } // No operator++ because we need error_code. - directory_iterator &increment(error_code &ec); + recursive_directory_iterator &increment(error_code &ec) { + static const directory_iterator end_itr; - const directory_entry &operator*() const; - const directory_entry *operator->() const; + if (State->HasNoPushRequest) + State->HasNoPushRequest = false; + else { + file_status st; + if ((ec = State->Stack.top()->status(st))) return *this; + if (is_directory(st)) { + State->Stack.push(directory_iterator(*State->Stack.top(), ec)); + if (ec) return *this; + if (State->Stack.top() != end_itr) { + ++State->Level; + return *this; + } + State->Stack.pop(); + } + } + + while (!State->Stack.empty() + && State->Stack.top().increment(ec) == end_itr) { + State->Stack.pop(); + --State->Level; + } + + // Check if we are done. If so, create an end iterator. + if (State->Stack.empty()) + State.reset(); + + return *this; + } + + const directory_entry &operator*() const { return *State->Stack.top(); } + const directory_entry *operator->() const { return &*State->Stack.top(); } // observers - /// Gets the current level. path is at level 0. - int level() const; + /// Gets the current level. Starting path is at level 0. + int level() const { return State->Level; } + /// Returns true if no_push has been called for this directory_entry. - bool no_push_request() const; + bool no_push_request() const { return State->HasNoPushRequest; } // modifiers /// Goes up one level if Level > 0. - void pop(); - /// Does not go down into the current directory_entry. - void no_push(); + void pop() { + assert(State && "Cannot pop and end itertor!"); + assert(State->Level > 0 && "Cannot pop an iterator with level < 1"); + static const directory_iterator end_itr; + error_code ec; + do { + if (ec) + report_fatal_error("Error incrementing directory iterator."); + State->Stack.pop(); + --State->Level; + } while (!State->Stack.empty() + && State->Stack.top().increment(ec) == end_itr); + + // Check if we are done. If so, create an end iterator. + if (State->Stack.empty()) + State.reset(); + } + + /// Does not go down into the current directory_entry. + void no_push() { State->HasNoPushRequest = true; } + + bool operator==(const recursive_directory_iterator &RHS) const { + return State == RHS.State; + } + + bool operator!=(const recursive_directory_iterator &RHS) const { + return !(*this == RHS); + } // Other members as required by // C++ Std, 24.1.1 Input iterators [input.iterators] }; diff --git a/contrib/llvm/include/llvm/Support/GraphWriter.h b/contrib/llvm/include/llvm/Support/GraphWriter.h index eab0c9d18db1..ae32da59dc22 100644 --- a/contrib/llvm/include/llvm/Support/GraphWriter.h +++ b/contrib/llvm/include/llvm/Support/GraphWriter.h @@ -296,26 +296,26 @@ class GraphWriter { template raw_ostream &WriteGraph(raw_ostream &O, const GraphType &G, bool ShortNames = false, - const std::string &Title = "") { + const Twine &Title = "") { // Start the graph emission process... GraphWriter W(O, G, ShortNames); // Emit the graph. - W.writeGraph(Title); + W.writeGraph(Title.str()); return O; } template -sys::Path WriteGraph(const GraphType &G, const std::string &Name, - bool ShortNames = false, const std::string &Title = "") { +sys::Path WriteGraph(const GraphType &G, const Twine &Name, + bool ShortNames = false, const Twine &Title = "") { std::string ErrMsg; sys::Path Filename = sys::Path::GetTemporaryDirectory(&ErrMsg); if (Filename.isEmpty()) { errs() << "Error: " << ErrMsg << "\n"; return Filename; } - Filename.appendComponent(Name + ".dot"); + Filename.appendComponent((Name + ".dot").str()); if (Filename.makeUnique(true,&ErrMsg)) { errs() << "Error: " << ErrMsg << "\n"; return sys::Path(); @@ -341,8 +341,8 @@ sys::Path WriteGraph(const GraphType &G, const std::string &Name, /// then cleanup. For use from the debugger. /// template -void ViewGraph(const GraphType &G, const std::string &Name, - bool ShortNames = false, const std::string &Title = "", +void ViewGraph(const GraphType &G, const Twine &Name, + bool ShortNames = false, const Twine &Title = "", GraphProgram::Name Program = GraphProgram::DOT) { sys::Path Filename = llvm::WriteGraph(G, Name, ShortNames, Title); diff --git a/contrib/llvm/include/llvm/Support/Host.h b/contrib/llvm/include/llvm/Support/Host.h index f77d4c1182bb..b33101632268 100644 --- a/contrib/llvm/include/llvm/Support/Host.h +++ b/contrib/llvm/include/llvm/Support/Host.h @@ -33,14 +33,14 @@ namespace sys { return !isLittleEndianHost(); } - /// getHostTriple() - Return the target triple of the running - /// system. + /// getDefaultTargetTriple() - Return the default target triple the compiler + /// has been configured to produce code for. /// /// The target triple is a string in the format of: /// CPU_TYPE-VENDOR-OPERATING_SYSTEM /// or /// CPU_TYPE-VENDOR-KERNEL-OPERATING_SYSTEM - std::string getHostTriple(); + std::string getDefaultTargetTriple(); /// getHostCPUName - Get the LLVM name for the host CPU. The particular format /// of the name is target dependent, and suitable for passing as -mcpu to the diff --git a/contrib/llvm/include/llvm/Support/IRReader.h b/contrib/llvm/include/llvm/Support/IRReader.h index 292c001e09f4..6d8a9b30ae1f 100644 --- a/contrib/llvm/include/llvm/Support/IRReader.h +++ b/contrib/llvm/include/llvm/Support/IRReader.h @@ -40,7 +40,8 @@ namespace llvm { std::string ErrMsg; Module *M = getLazyBitcodeModule(Buffer, Context, &ErrMsg); if (M == 0) { - Err = SMDiagnostic(Buffer->getBufferIdentifier(), ErrMsg); + Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error, + ErrMsg); // ParseBitcodeFile does not take ownership of the Buffer in the // case of an error. delete Buffer; @@ -60,7 +61,7 @@ namespace llvm { LLVMContext &Context) { OwningPtr File; if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) { - Err = SMDiagnostic(Filename, + Err = SMDiagnostic(Filename, SourceMgr::DK_Error, "Could not open input file: " + ec.message()); return 0; } @@ -80,7 +81,8 @@ namespace llvm { std::string ErrMsg; Module *M = ParseBitcodeFile(Buffer, Context, &ErrMsg); if (M == 0) - Err = SMDiagnostic(Buffer->getBufferIdentifier(), ErrMsg); + Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error, + ErrMsg); // ParseBitcodeFile does not take ownership of the Buffer. delete Buffer; return M; @@ -97,7 +99,7 @@ namespace llvm { LLVMContext &Context) { OwningPtr File; if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) { - Err = SMDiagnostic(Filename, + Err = SMDiagnostic(Filename, SourceMgr::DK_Error, "Could not open input file: " + ec.message()); return 0; } diff --git a/contrib/llvm/include/llvm/Support/InstVisitor.h b/contrib/llvm/include/llvm/Support/InstVisitor.h index a661c4fac68d..52de8f660dd1 100644 --- a/contrib/llvm/include/llvm/Support/InstVisitor.h +++ b/contrib/llvm/include/llvm/Support/InstVisitor.h @@ -14,6 +14,7 @@ #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Module.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" namespace llvm { @@ -157,54 +158,74 @@ class InstVisitor { // Specific Instruction type classes... note that all of the casts are // necessary because we use the instruction classes as opaque types... // - RetTy visitReturnInst(ReturnInst &I) { DELEGATE(TerminatorInst);} - RetTy visitBranchInst(BranchInst &I) { DELEGATE(TerminatorInst);} - RetTy visitSwitchInst(SwitchInst &I) { DELEGATE(TerminatorInst);} - RetTy visitIndirectBrInst(IndirectBrInst &I) { DELEGATE(TerminatorInst);} - RetTy visitInvokeInst(InvokeInst &I) { DELEGATE(TerminatorInst);} - RetTy visitUnwindInst(UnwindInst &I) { DELEGATE(TerminatorInst);} - RetTy visitResumeInst(ResumeInst &I) { DELEGATE(TerminatorInst);} - RetTy visitUnreachableInst(UnreachableInst &I) { DELEGATE(TerminatorInst);} - RetTy visitICmpInst(ICmpInst &I) { DELEGATE(CmpInst);} - RetTy visitFCmpInst(FCmpInst &I) { DELEGATE(CmpInst);} - RetTy visitAllocaInst(AllocaInst &I) { DELEGATE(Instruction); } - RetTy visitLoadInst(LoadInst &I) { DELEGATE(Instruction); } - RetTy visitStoreInst(StoreInst &I) { DELEGATE(Instruction); } - RetTy visitAtomicCmpXchgInst(AtomicCmpXchgInst &I){ DELEGATE(Instruction); } - RetTy visitAtomicRMWInst(AtomicRMWInst &I) { DELEGATE(Instruction); } - RetTy visitFenceInst(FenceInst &I) { DELEGATE(Instruction); } - RetTy visitGetElementPtrInst(GetElementPtrInst &I){ DELEGATE(Instruction); } - RetTy visitPHINode(PHINode &I) { DELEGATE(Instruction); } - RetTy visitTruncInst(TruncInst &I) { DELEGATE(CastInst); } - RetTy visitZExtInst(ZExtInst &I) { DELEGATE(CastInst); } - RetTy visitSExtInst(SExtInst &I) { DELEGATE(CastInst); } - RetTy visitFPTruncInst(FPTruncInst &I) { DELEGATE(CastInst); } - RetTy visitFPExtInst(FPExtInst &I) { DELEGATE(CastInst); } - RetTy visitFPToUIInst(FPToUIInst &I) { DELEGATE(CastInst); } - RetTy visitFPToSIInst(FPToSIInst &I) { DELEGATE(CastInst); } - RetTy visitUIToFPInst(UIToFPInst &I) { DELEGATE(CastInst); } - RetTy visitSIToFPInst(SIToFPInst &I) { DELEGATE(CastInst); } - RetTy visitPtrToIntInst(PtrToIntInst &I) { DELEGATE(CastInst); } - RetTy visitIntToPtrInst(IntToPtrInst &I) { DELEGATE(CastInst); } - RetTy visitBitCastInst(BitCastInst &I) { DELEGATE(CastInst); } - RetTy visitSelectInst(SelectInst &I) { DELEGATE(Instruction); } - RetTy visitCallInst(CallInst &I) { DELEGATE(Instruction); } - RetTy visitVAArgInst(VAArgInst &I) { DELEGATE(Instruction); } + RetTy visitReturnInst(ReturnInst &I) { DELEGATE(TerminatorInst);} + RetTy visitBranchInst(BranchInst &I) { DELEGATE(TerminatorInst);} + RetTy visitSwitchInst(SwitchInst &I) { DELEGATE(TerminatorInst);} + RetTy visitIndirectBrInst(IndirectBrInst &I) { DELEGATE(TerminatorInst);} + RetTy visitResumeInst(ResumeInst &I) { DELEGATE(TerminatorInst);} + RetTy visitUnreachableInst(UnreachableInst &I) { DELEGATE(TerminatorInst);} + RetTy visitICmpInst(ICmpInst &I) { DELEGATE(CmpInst);} + RetTy visitFCmpInst(FCmpInst &I) { DELEGATE(CmpInst);} + RetTy visitAllocaInst(AllocaInst &I) { DELEGATE(UnaryInstruction);} + RetTy visitLoadInst(LoadInst &I) { DELEGATE(UnaryInstruction);} + RetTy visitStoreInst(StoreInst &I) { DELEGATE(Instruction);} + RetTy visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { DELEGATE(Instruction);} + RetTy visitAtomicRMWInst(AtomicRMWInst &I) { DELEGATE(Instruction);} + RetTy visitFenceInst(FenceInst &I) { DELEGATE(Instruction);} + RetTy visitGetElementPtrInst(GetElementPtrInst &I){ DELEGATE(Instruction);} + RetTy visitPHINode(PHINode &I) { DELEGATE(Instruction);} + RetTy visitTruncInst(TruncInst &I) { DELEGATE(CastInst);} + RetTy visitZExtInst(ZExtInst &I) { DELEGATE(CastInst);} + RetTy visitSExtInst(SExtInst &I) { DELEGATE(CastInst);} + RetTy visitFPTruncInst(FPTruncInst &I) { DELEGATE(CastInst);} + RetTy visitFPExtInst(FPExtInst &I) { DELEGATE(CastInst);} + RetTy visitFPToUIInst(FPToUIInst &I) { DELEGATE(CastInst);} + RetTy visitFPToSIInst(FPToSIInst &I) { DELEGATE(CastInst);} + RetTy visitUIToFPInst(UIToFPInst &I) { DELEGATE(CastInst);} + RetTy visitSIToFPInst(SIToFPInst &I) { DELEGATE(CastInst);} + RetTy visitPtrToIntInst(PtrToIntInst &I) { DELEGATE(CastInst);} + RetTy visitIntToPtrInst(IntToPtrInst &I) { DELEGATE(CastInst);} + RetTy visitBitCastInst(BitCastInst &I) { DELEGATE(CastInst);} + RetTy visitSelectInst(SelectInst &I) { DELEGATE(Instruction);} + RetTy visitVAArgInst(VAArgInst &I) { DELEGATE(UnaryInstruction);} RetTy visitExtractElementInst(ExtractElementInst &I) { DELEGATE(Instruction);} - RetTy visitInsertElementInst(InsertElementInst &I) { DELEGATE(Instruction); } - RetTy visitShuffleVectorInst(ShuffleVectorInst &I) { DELEGATE(Instruction); } - RetTy visitExtractValueInst(ExtractValueInst &I) { DELEGATE(Instruction);} - RetTy visitInsertValueInst(InsertValueInst &I) { DELEGATE(Instruction); } - RetTy visitLandingPadInst(LandingPadInst &I) { DELEGATE(Instruction); } + RetTy visitInsertElementInst(InsertElementInst &I) { DELEGATE(Instruction);} + RetTy visitShuffleVectorInst(ShuffleVectorInst &I) { DELEGATE(Instruction);} + RetTy visitExtractValueInst(ExtractValueInst &I){ DELEGATE(UnaryInstruction);} + RetTy visitInsertValueInst(InsertValueInst &I) { DELEGATE(Instruction); } + RetTy visitLandingPadInst(LandingPadInst &I) { DELEGATE(Instruction); } + + // Call and Invoke are slightly different as they delegate first through + // a generic CallSite visitor. + RetTy visitCallInst(CallInst &I) { + return static_cast(this)->visitCallSite(&I); + } + RetTy visitInvokeInst(InvokeInst &I) { + return static_cast(this)->visitCallSite(&I); + } // Next level propagators: If the user does not overload a specific // instruction type, they can overload one of these to get the whole class // of instructions... // - RetTy visitTerminatorInst(TerminatorInst &I) { DELEGATE(Instruction); } - RetTy visitBinaryOperator(BinaryOperator &I) { DELEGATE(Instruction); } - RetTy visitCmpInst(CmpInst &I) { DELEGATE(Instruction); } - RetTy visitCastInst(CastInst &I) { DELEGATE(Instruction); } + RetTy visitCastInst(CastInst &I) { DELEGATE(UnaryInstruction);} + RetTy visitBinaryOperator(BinaryOperator &I) { DELEGATE(Instruction);} + RetTy visitCmpInst(CmpInst &I) { DELEGATE(Instruction);} + RetTy visitTerminatorInst(TerminatorInst &I) { DELEGATE(Instruction);} + RetTy visitUnaryInstruction(UnaryInstruction &I){ DELEGATE(Instruction);} + + // Provide a special visitor for a 'callsite' that visits both calls and + // invokes. When unimplemented, properly delegates to either the terminator or + // regular instruction visitor. + RetTy visitCallSite(CallSite CS) { + assert(CS); + Instruction &I = *CS.getInstruction(); + if (CS.isCall()) + DELEGATE(Instruction); + + assert(CS.isInvoke()); + DELEGATE(TerminatorInst); + } // If the user wants a 'default' case, they can choose to override this // function. If this function is not overloaded in the user's subclass, then diff --git a/contrib/llvm/include/llvm/Support/JSONParser.h b/contrib/llvm/include/llvm/Support/JSONParser.h new file mode 100644 index 000000000000..11149f1e47ba --- /dev/null +++ b/contrib/llvm/include/llvm/Support/JSONParser.h @@ -0,0 +1,448 @@ +//===--- JSONParser.h - Simple JSON parser ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a JSON parser. +// +// See http://www.json.org/ for an overview. +// See http://www.ietf.org/rfc/rfc4627.txt for the full standard. +// +// FIXME: Currently this supports a subset of JSON. Specifically, support +// for numbers, booleans and null for values is missing. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_JSON_PARSER_H +#define LLVM_SUPPORT_JSON_PARSER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SourceMgr.h" + +namespace llvm { + +class JSONContainer; +class JSONString; +class JSONValue; +class JSONKeyValuePair; + +/// \brief Base class for a parsable JSON atom. +/// +/// This class has no semantics other than being a unit of JSON data which can +/// be parsed out of a JSON document. +class JSONAtom { +public: + /// \brief Possible types of JSON objects. + enum Kind { JK_KeyValuePair, JK_Array, JK_Object, JK_String }; + + /// \brief Returns the type of this value. + Kind getKind() const { return MyKind; } + + static bool classof(const JSONAtom *Atom) { return true; } + +protected: + JSONAtom(Kind MyKind) : MyKind(MyKind) {} + +private: + Kind MyKind; +}; + +/// \brief A parser for JSON text. +/// +/// Use an object of JSONParser to iterate over the values of a JSON text. +/// All objects are parsed during the iteration, so you can only iterate once +/// over the JSON text, but the cost of partial iteration is minimized. +/// Create a new JSONParser if you want to iterate multiple times. +class JSONParser { +public: + /// \brief Create a JSONParser for the given input. + /// + /// Parsing is started via parseRoot(). Access to the object returned from + /// parseRoot() will parse the input lazily. + JSONParser(StringRef Input, SourceMgr *SM); + + /// \brief Returns the outermost JSON value (either an array or an object). + /// + /// Can return NULL if the input does not start with an array or an object. + /// The object is not parsed yet - the caller must iterate over the + /// returned object to trigger parsing. + /// + /// A JSONValue can be either a JSONString, JSONObject or JSONArray. + JSONValue *parseRoot(); + + /// \brief Parses the JSON text and returns whether it is valid JSON. + /// + /// In case validate() return false, failed() will return true and + /// getErrorMessage() will return the parsing error. + bool validate(); + + /// \brief Returns true if an error occurs during parsing. + /// + /// If there was an error while parsing an object that was created by + /// iterating over the result of 'parseRoot', 'failed' will return true. + bool failed() const; + +private: + /// \brief These methods manage the implementation details of parsing new JSON + /// atoms. + /// @{ + JSONString *parseString(); + JSONValue *parseValue(); + JSONKeyValuePair *parseKeyValuePair(); + /// @} + + /// \brief Helpers to parse the elements out of both forms of containers. + /// @{ + const JSONAtom *parseElement(JSONAtom::Kind ContainerKind); + StringRef::iterator parseFirstElement(JSONAtom::Kind ContainerKind, + char StartChar, char EndChar, + const JSONAtom *&Element); + StringRef::iterator parseNextElement(JSONAtom::Kind ContainerKind, + char EndChar, + const JSONAtom *&Element); + /// @} + + /// \brief Whitespace parsing. + /// @{ + void nextNonWhitespace(); + bool isWhitespace(); + /// @} + + /// \brief These methods are used for error handling. + /// { + void setExpectedError(StringRef Expected, StringRef Found); + void setExpectedError(StringRef Expected, char Found); + bool errorIfAtEndOfFile(StringRef Message); + bool errorIfNotAt(char C, StringRef Message); + /// } + + /// \brief Skips all elements in the given container. + bool skipContainer(const JSONContainer &Container); + + /// \brief Skips to the next position behind the given JSON atom. + bool skip(const JSONAtom &Atom); + + /// All nodes are allocated by the parser and will be deallocated when the + /// parser is destroyed. + BumpPtrAllocator ValueAllocator; + + /// \brief The original input to the parser. + MemoryBuffer *InputBuffer; + + /// \brief The source manager used for diagnostics and buffer management. + SourceMgr *SM; + + /// \brief The current position in the parse stream. + StringRef::iterator Position; + + /// \brief The end position for fast EOF checks without introducing + /// unnecessary dereferences. + StringRef::iterator End; + + /// \brief If true, an error has occurred. + bool Failed; + + friend class JSONContainer; +}; + + +/// \brief Base class for JSON value objects. +/// +/// This object represents an abstract JSON value. It is the root node behind +/// the group of JSON entities that can represent top-level values in a JSON +/// document. It has no API, and is just a placeholder in the type hierarchy of +/// nodes. +class JSONValue : public JSONAtom { +protected: + JSONValue(Kind MyKind) : JSONAtom(MyKind) {} + +public: + /// \brief dyn_cast helpers + ///@{ + static bool classof(const JSONAtom *Atom) { + switch (Atom->getKind()) { + case JK_Array: + case JK_Object: + case JK_String: + return true; + case JK_KeyValuePair: + return false; + } + llvm_unreachable("Invalid JSONAtom kind"); + } + static bool classof(const JSONValue *Value) { return true; } + ///@} +}; + +/// \brief Gives access to the text of a JSON string. +/// +/// FIXME: Implement a method to return the unescaped text. +class JSONString : public JSONValue { +public: + /// \brief Returns the underlying parsed text of the string. + /// + /// This is the unescaped content of the JSON text. + /// See http://www.ietf.org/rfc/rfc4627.txt for details. + StringRef getRawText() const { return RawText; } + +private: + JSONString(StringRef RawText) : JSONValue(JK_String), RawText(RawText) {} + + StringRef RawText; + + friend class JSONParser; + +public: + /// \brief dyn_cast helpers + ///@{ + static bool classof(const JSONAtom *Atom) { + return Atom->getKind() == JK_String; + } + static bool classof(const JSONString *String) { return true; } + ///@} +}; + +/// \brief A (key, value) tuple of type (JSONString *, JSONValue *). +/// +/// Note that JSONKeyValuePair is not a JSONValue, it is a bare JSONAtom. +/// JSONKeyValuePairs can be elements of a JSONObject, but not of a JSONArray. +/// They are not viable as top-level values either. +class JSONKeyValuePair : public JSONAtom { +public: + const JSONString * const Key; + const JSONValue * const Value; + +private: + JSONKeyValuePair(const JSONString *Key, const JSONValue *Value) + : JSONAtom(JK_KeyValuePair), Key(Key), Value(Value) {} + + friend class JSONParser; + +public: + /// \brief dyn_cast helpers + ///@{ + static bool classof(const JSONAtom *Atom) { + return Atom->getKind() == JK_KeyValuePair; + } + static bool classof(const JSONKeyValuePair *KeyValuePair) { return true; } + ///@} +}; + +/// \brief Implementation of JSON containers (arrays and objects). +/// +/// JSONContainers drive the lazy parsing of JSON arrays and objects via +/// forward iterators. +class JSONContainer : public JSONValue { +private: + /// \brief An iterator that parses the underlying container during iteration. + /// + /// Iterators on the same collection use shared state, so when multiple copies + /// of an iterator exist, only one is allowed to be used for iteration; + /// iterating multiple copies of an iterator of the same collection will lead + /// to undefined behavior. + class AtomIterator { + public: + AtomIterator(const AtomIterator &I) : Container(I.Container) {} + + /// \brief Iterator interface. + ///@{ + bool operator==(const AtomIterator &I) const { + if (isEnd() || I.isEnd()) + return isEnd() == I.isEnd(); + return Container->Position == I.Container->Position; + } + bool operator!=(const AtomIterator &I) const { + return !(*this == I); + } + AtomIterator &operator++() { + Container->parseNextElement(); + return *this; + } + const JSONAtom *operator*() { + return Container->Current; + } + ///@} + + private: + /// \brief Create an iterator for which 'isEnd' returns true. + AtomIterator() : Container(0) {} + + /// \brief Create an iterator for the given container. + AtomIterator(const JSONContainer *Container) : Container(Container) {} + + bool isEnd() const { + return Container == 0 || Container->Position == StringRef::iterator(); + } + + const JSONContainer * const Container; + + friend class JSONContainer; + }; + +protected: + /// \brief An iterator for the specified AtomT. + /// + /// Used for the implementation of iterators for JSONArray and JSONObject. + template + class IteratorTemplate : public std::iterator { + public: + explicit IteratorTemplate(const AtomIterator& AtomI) + : AtomI(AtomI) {} + + bool operator==(const IteratorTemplate &I) const { + return AtomI == I.AtomI; + } + bool operator!=(const IteratorTemplate &I) const { return !(*this == I); } + + IteratorTemplate &operator++() { + ++AtomI; + return *this; + } + + const AtomT *operator*() { return dyn_cast(*AtomI); } + + private: + AtomIterator AtomI; + }; + + JSONContainer(JSONParser *Parser, char StartChar, char EndChar, + JSONAtom::Kind ContainerKind) + : JSONValue(ContainerKind), Parser(Parser), + Position(), Current(0), Started(false), + StartChar(StartChar), EndChar(EndChar) {} + + /// \brief Returns a lazy parsing iterator over the container. + /// + /// As the iterator drives the parse stream, begin() must only be called + /// once per container. + AtomIterator atom_begin() const { + if (Started) + report_fatal_error("Cannot parse container twice."); + Started = true; + // Set up the position and current element when we begin iterating over the + // container. + Position = Parser->parseFirstElement(getKind(), StartChar, EndChar, Current); + return AtomIterator(this); + } + AtomIterator atom_end() const { + return AtomIterator(); + } + +private: + AtomIterator atom_current() const { + if (!Started) + return atom_begin(); + + return AtomIterator(this); + } + + /// \brief Parse the next element in the container into the Current element. + /// + /// This routine is called as an iterator into this container walks through + /// its elements. It mutates the container's internal current node to point to + /// the next atom of the container. + void parseNextElement() const { + Parser->skip(*Current); + Position = Parser->parseNextElement(getKind(), EndChar, Current); + } + + // For parsing, JSONContainers call back into the JSONParser. + JSONParser * const Parser; + + // 'Position', 'Current' and 'Started' store the state of the parse stream + // for iterators on the container, they don't change the container's elements + // and are thus marked as mutable. + mutable StringRef::iterator Position; + mutable const JSONAtom *Current; + mutable bool Started; + + const char StartChar; + const char EndChar; + + friend class JSONParser; + +public: + /// \brief dyn_cast helpers + ///@{ + static bool classof(const JSONAtom *Atom) { + switch (Atom->getKind()) { + case JK_Array: + case JK_Object: + return true; + case JK_KeyValuePair: + case JK_String: + return false; + } + llvm_unreachable("Invalid JSONAtom kind"); + } + static bool classof(const JSONContainer *Container) { return true; } + ///@} +}; + +/// \brief A simple JSON array. +class JSONArray : public JSONContainer { +public: + typedef IteratorTemplate const_iterator; + + /// \brief Returns a lazy parsing iterator over the container. + /// + /// As the iterator drives the parse stream, begin() must only be called + /// once per container. + const_iterator begin() const { return const_iterator(atom_begin()); } + const_iterator end() const { return const_iterator(atom_end()); } + +private: + JSONArray(JSONParser *Parser) + : JSONContainer(Parser, '[', ']', JSONAtom::JK_Array) {} + +public: + /// \brief dyn_cast helpers + ///@{ + static bool classof(const JSONAtom *Atom) { + return Atom->getKind() == JSONAtom::JK_Array; + } + static bool classof(const JSONArray *Array) { return true; } + ///@} + + friend class JSONParser; +}; + +/// \brief A JSON object: an iterable list of JSON key-value pairs. +class JSONObject : public JSONContainer { +public: + typedef IteratorTemplate const_iterator; + + /// \brief Returns a lazy parsing iterator over the container. + /// + /// As the iterator drives the parse stream, begin() must only be called + /// once per container. + const_iterator begin() const { return const_iterator(atom_begin()); } + const_iterator end() const { return const_iterator(atom_end()); } + +private: + JSONObject(JSONParser *Parser) + : JSONContainer(Parser, '{', '}', JSONAtom::JK_Object) {} + +public: + /// \brief dyn_cast helpers + ///@{ + static bool classof(const JSONAtom *Atom) { + return Atom->getKind() == JSONAtom::JK_Object; + } + static bool classof(const JSONObject *Object) { return true; } + ///@} + + friend class JSONParser; +}; + +} // end namespace llvm + +#endif // LLVM_SUPPORT_JSON_PARSER_H diff --git a/contrib/llvm/include/llvm/Support/LockFileManager.h b/contrib/llvm/include/llvm/Support/LockFileManager.h new file mode 100644 index 000000000000..e2fa8ebc56e4 --- /dev/null +++ b/contrib/llvm/include/llvm/Support/LockFileManager.h @@ -0,0 +1,74 @@ +//===--- LockFileManager.h - File-level locking utility ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_SUPPORT_LOCKFILEMANAGER_H +#define LLVM_SUPPORT_LOCKFILEMANAGER_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/system_error.h" +#include // for std::pair + +namespace llvm { + +/// \brief Class that manages the creation of a lock file to aid +/// implicit coordination between different processes. +/// +/// The implicit coordination works by creating a ".lock" file alongside +/// the file that we're coordinating for, using the atomicity of the file +/// system to ensure that only a single process can create that ".lock" file. +/// When the lock file is removed, the owning process has finished the +/// operation. +class LockFileManager { +public: + /// \brief Describes the state of a lock file. + enum LockFileState { + /// \brief The lock file has been created and is owned by this instance + /// of the object. + LFS_Owned, + /// \brief The lock file already exists and is owned by some other + /// instance. + LFS_Shared, + /// \brief An error occurred while trying to create or find the lock + /// file. + LFS_Error + }; + +private: + SmallString<128> LockFileName; + SmallString<128> UniqueLockFileName; + + Optional > Owner; + Optional Error; + + LockFileManager(const LockFileManager &); + LockFileManager &operator=(const LockFileManager &); + + static Optional > + readLockFile(StringRef LockFileName); + + static bool processStillExecuting(StringRef Hostname, int PID); + +public: + + LockFileManager(StringRef FileName); + ~LockFileManager(); + + /// \brief Determine the state of the lock file. + LockFileState getState() const; + + operator LockFileState() const { return getState(); } + + /// \brief For a shared lock, wait until the owner releases the lock. + void waitForUnlock(); +}; + +} // end namespace llvm + +#endif // LLVM_SUPPORT_LOCKFILEMANAGER_H diff --git a/contrib/llvm/include/llvm/Support/MachO.h b/contrib/llvm/include/llvm/Support/MachO.h index 5b6858613ff6..44a7a791c522 100644 --- a/contrib/llvm/include/llvm/Support/MachO.h +++ b/contrib/llvm/include/llvm/Support/MachO.h @@ -114,6 +114,10 @@ namespace llvm { LoadCommandVersionMinIPhoneOS = 0x00000025u, // LC_VERSION_MIN_IPHONEOS LoadCommandFunctionStarts = 0x00000026u, // LC_FUNCTION_STARTS LoadCommandDyldEnvironment = 0x00000027u, // LC_DYLD_ENVIRONMENT + LoadCommandMain = 0x80000028u, // LC_MAIN + LoadCommandDataInCode = 0x00000029u, // LC_DATA_IN_CODE + LoadCommandSourceVersion = 0x0000002Au, // LC_SOURCE_VERSION + LoadCommandCodeSignDRs = 0x0000002Bu, // LC_DYLIB_CODE_SIGN_DRS // Constant bits for the "flags" field in llvm::MachO::segment_command SegmentCommandFlagBitHighVM = 0x1u, // SG_HIGHVM @@ -240,6 +244,9 @@ namespace llvm { NListSectionNoSection = 0u, // NO_SECT NListSectionMaxSection = 0xffu, // MAX_SECT + NListDescWeakRef = 0x40u, + NListDescWeakDef = 0x80u, + // Constant values for the "n_type" field in llvm::MachO::nlist and // llvm::MachO::nlist_64 when "(n_type & NlistMaskStab) != 0" StabGlobalSymbol = 0x20u, // N_GSYM diff --git a/contrib/llvm/include/llvm/Support/ManagedStatic.h b/contrib/llvm/include/llvm/Support/ManagedStatic.h index 53e73ad35f49..4171d1bec8dc 100644 --- a/contrib/llvm/include/llvm/Support/ManagedStatic.h +++ b/contrib/llvm/include/llvm/Support/ManagedStatic.h @@ -16,6 +16,7 @@ #include "llvm/Support/Atomic.h" #include "llvm/Support/Threading.h" +#include "llvm/Support/Valgrind.h" namespace llvm { @@ -65,6 +66,7 @@ class ManagedStatic : public ManagedStaticBase { void* tmp = Ptr; if (llvm_is_multithreaded()) sys::MemoryFence(); if (!tmp) RegisterManagedStatic(object_creator, object_deleter::call); + TsanHappensAfter(this); return *static_cast(Ptr); } @@ -72,6 +74,7 @@ class ManagedStatic : public ManagedStaticBase { void* tmp = Ptr; if (llvm_is_multithreaded()) sys::MemoryFence(); if (!tmp) RegisterManagedStatic(object_creator, object_deleter::call); + TsanHappensAfter(this); return static_cast(Ptr); } @@ -79,6 +82,7 @@ class ManagedStatic : public ManagedStaticBase { void* tmp = Ptr; if (llvm_is_multithreaded()) sys::MemoryFence(); if (!tmp) RegisterManagedStatic(object_creator, object_deleter::call); + TsanHappensAfter(this); return *static_cast(Ptr); } @@ -86,6 +90,7 @@ class ManagedStatic : public ManagedStaticBase { void* tmp = Ptr; if (llvm_is_multithreaded()) sys::MemoryFence(); if (!tmp) RegisterManagedStatic(object_creator, object_deleter::call); + TsanHappensAfter(this); return static_cast(Ptr); } diff --git a/contrib/llvm/include/llvm/Support/MathExtras.h b/contrib/llvm/include/llvm/Support/MathExtras.h index 4627557f7f1f..d085c94f2adc 100644 --- a/contrib/llvm/include/llvm/Support/MathExtras.h +++ b/contrib/llvm/include/llvm/Support/MathExtras.h @@ -51,6 +51,13 @@ inline bool isInt<32>(int64_t x) { return static_cast(x) == x; } +/// isShiftedInt - Checks if a signed integer is an N bit number shifted +/// left by S. +template +inline bool isShiftedInt(int64_t x) { + return isInt(x) && (x % (1< inline bool isUInt(uint64_t x) { @@ -70,6 +77,13 @@ inline bool isUInt<32>(uint64_t x) { return static_cast(x) == x; } +/// isShiftedUInt - Checks if a unsigned integer is an N bit number shifted +/// left by S. +template +inline bool isShiftedUInt(uint64_t x) { + return isUInt(x) && (x % (1<getValue(); return true; } + // FIXME: Remove this. if (ConstantVector *CV = dyn_cast(V)) if (ConstantInt *CI = dyn_cast_or_null(CV->getSplatValue())) { Res = &CI->getValue(); return true; } + if (ConstantDataVector *CV = dyn_cast(V)) + if (ConstantInt *CI = + dyn_cast_or_null(CV->getSplatValue())) { + Res = &CI->getValue(); + return true; + } return false; } }; @@ -143,9 +151,13 @@ struct cst_pred_ty : public Predicate { bool match(ITy *V) { if (const ConstantInt *CI = dyn_cast(V)) return this->isValue(CI->getValue()); + // FIXME: Remove this. if (const ConstantVector *CV = dyn_cast(V)) if (ConstantInt *CI = dyn_cast_or_null(CV->getSplatValue())) return this->isValue(CI->getValue()); + if (const ConstantDataVector *CV = dyn_cast(V)) + if (ConstantInt *CI = dyn_cast_or_null(CV->getSplatValue())) + return this->isValue(CI->getValue()); return false; } }; @@ -163,12 +175,22 @@ struct api_pred_ty : public Predicate { Res = &CI->getValue(); return true; } + + // FIXME: remove. if (const ConstantVector *CV = dyn_cast(V)) if (ConstantInt *CI = dyn_cast_or_null(CV->getSplatValue())) if (this->isValue(CI->getValue())) { Res = &CI->getValue(); return true; } + + if (const ConstantDataVector *CV = dyn_cast(V)) + if (ConstantInt *CI = dyn_cast_or_null(CV->getSplatValue())) + if (this->isValue(CI->getValue())) { + Res = &CI->getValue(); + return true; + } + return false; } }; @@ -440,6 +462,26 @@ m_IDiv(const LHS &L, const RHS &R) { return BinOp2_match(L, R); } +//===----------------------------------------------------------------------===// +// Class that matches exact binary ops. +// +template +struct Exact_match { + SubPattern_t SubPattern; + + Exact_match(const SubPattern_t &SP) : SubPattern(SP) {} + + template + bool match(OpTy *V) { + if (PossiblyExactOperator *PEO = dyn_cast(V)) + return PEO->isExact() && SubPattern.match(V); + return false; + } +}; + +template +inline Exact_match m_Exact(const T &SubPattern) { return SubPattern; } + //===----------------------------------------------------------------------===// // Matchers for CmpInst classes // @@ -529,10 +571,8 @@ struct CastClass_match { template bool match(OpTy *V) { - if (CastInst *I = dyn_cast(V)) - return I->getOpcode() == Opcode && Op.match(I->getOperand(0)); - if (ConstantExpr *CE = dyn_cast(V)) - return CE->getOpcode() == Opcode && Op.match(CE->getOperand(0)); + if (Operator *O = dyn_cast(V)) + return O->getOpcode() == Opcode && Op.match(O->getOperand(0)); return false; } }; @@ -585,21 +625,18 @@ struct not_match { template bool match(OpTy *V) { - if (Instruction *I = dyn_cast(V)) - if (I->getOpcode() == Instruction::Xor) - return matchIfNot(I->getOperand(0), I->getOperand(1)); - if (ConstantExpr *CE = dyn_cast(V)) - if (CE->getOpcode() == Instruction::Xor) - return matchIfNot(CE->getOperand(0), CE->getOperand(1)); + if (Operator *O = dyn_cast(V)) + if (O->getOpcode() == Instruction::Xor) + return matchIfNot(O->getOperand(0), O->getOperand(1)); return false; } private: bool matchIfNot(Value *LHS, Value *RHS) { - if (ConstantInt *CI = dyn_cast(RHS)) - return CI->isAllOnesValue() && L.match(LHS); - if (ConstantVector *CV = dyn_cast(RHS)) - return CV->isAllOnesValue() && L.match(LHS); - return false; + return (isa(RHS) || isa(RHS) || + // FIXME: Remove CV. + isa(RHS)) && + cast(RHS)->isAllOnesValue() && + L.match(LHS); } }; @@ -615,19 +652,16 @@ struct neg_match { template bool match(OpTy *V) { - if (Instruction *I = dyn_cast(V)) - if (I->getOpcode() == Instruction::Sub) - return matchIfNeg(I->getOperand(0), I->getOperand(1)); - if (ConstantExpr *CE = dyn_cast(V)) - if (CE->getOpcode() == Instruction::Sub) - return matchIfNeg(CE->getOperand(0), CE->getOperand(1)); + if (Operator *O = dyn_cast(V)) + if (O->getOpcode() == Instruction::Sub) + return matchIfNeg(O->getOperand(0), O->getOperand(1)); return false; } private: bool matchIfNeg(Value *LHS, Value *RHS) { - if (ConstantInt *C = dyn_cast(LHS)) - return C->isZero() && L.match(RHS); - return false; + return ((isa(LHS) && cast(LHS)->isZero()) || + isa(LHS)) && + L.match(RHS); } }; @@ -644,12 +678,9 @@ struct fneg_match { template bool match(OpTy *V) { - if (Instruction *I = dyn_cast(V)) - if (I->getOpcode() == Instruction::FSub) - return matchIfFNeg(I->getOperand(0), I->getOperand(1)); - if (ConstantExpr *CE = dyn_cast(V)) - if (CE->getOpcode() == Instruction::FSub) - return matchIfFNeg(CE->getOperand(0), CE->getOperand(1)); + if (Operator *O = dyn_cast(V)) + if (O->getOpcode() == Instruction::FSub) + return matchIfFNeg(O->getOperand(0), O->getOperand(1)); return false; } private: diff --git a/contrib/llvm/include/llvm/Support/Process.h b/contrib/llvm/include/llvm/Support/Process.h index 27ef2670093a..33799229ff35 100644 --- a/contrib/llvm/include/llvm/Support/Process.h +++ b/contrib/llvm/include/llvm/Support/Process.h @@ -138,9 +138,6 @@ namespace sys { /// Resets the terminals colors, or returns an escape sequence to do so. static const char *ResetColor(); - - /// Change the program working directory to that given by \arg Path. - static void SetWorkingDirectory(std::string Path); /// @} }; } diff --git a/contrib/llvm/include/llvm/Support/Program.h b/contrib/llvm/include/llvm/Support/Program.h index a5026573aa94..a85f23550ec8 100644 --- a/contrib/llvm/include/llvm/Support/Program.h +++ b/contrib/llvm/include/llvm/Support/Program.h @@ -17,6 +17,7 @@ #include "llvm/Support/Path.h" namespace llvm { +class error_code; namespace sys { // TODO: Add operations to communicate with the process, redirect its I/O, @@ -122,12 +123,12 @@ namespace sys { /// @brief Construct a Program by finding it by name. static Path FindProgramByName(const std::string& name); - // These methods change the specified standard stream (stdin, - // stdout, or stderr) to binary mode. They return true if an error - // occurred - static bool ChangeStdinToBinary(); - static bool ChangeStdoutToBinary(); - static bool ChangeStderrToBinary(); + // These methods change the specified standard stream (stdin, stdout, or + // stderr) to binary mode. They return errc::success if the specified stream + // was changed. Otherwise a platform dependent error is returned. + static error_code ChangeStdinToBinary(); + static error_code ChangeStdoutToBinary(); + static error_code ChangeStderrToBinary(); /// A convenience function equivalent to Program prg; prg.Execute(..); /// prg.Wait(..); diff --git a/contrib/llvm/include/llvm/Support/Recycler.h b/contrib/llvm/include/llvm/Support/Recycler.h index d8f8c7894142..fa6e189e97bd 100644 --- a/contrib/llvm/include/llvm/Support/Recycler.h +++ b/contrib/llvm/include/llvm/Support/Recycler.h @@ -17,6 +17,7 @@ #include "llvm/ADT/ilist.h" #include "llvm/Support/AlignOf.h" +#include "llvm/Support/ErrorHandling.h" #include namespace llvm { @@ -52,7 +53,7 @@ struct ilist_traits : static void noteHead(RecyclerStruct*, RecyclerStruct*) {} static void deleteNode(RecyclerStruct *) { - assert(0 && "Recycler's ilist_traits shouldn't see a deleteNode call!"); + llvm_unreachable("Recycler's ilist_traits shouldn't see a deleteNode call!"); } }; diff --git a/contrib/llvm/include/llvm/Support/SMLoc.h b/contrib/llvm/include/llvm/Support/SMLoc.h index 02db32794b6d..d48bfcc30c5b 100644 --- a/contrib/llvm/include/llvm/Support/SMLoc.h +++ b/contrib/llvm/include/llvm/Support/SMLoc.h @@ -15,9 +15,11 @@ #ifndef SUPPORT_SMLOC_H #define SUPPORT_SMLOC_H +#include + namespace llvm { -// SMLoc - Represents a location in source code. +/// SMLoc - Represents a location in source code. class SMLoc { const char *Ptr; public: @@ -38,7 +40,23 @@ class SMLoc { } }; -} +/// SMRange - Represents a range in source code. Note that unlike standard STL +/// ranges, the locations specified are considered to be *inclusive*. For +/// example, [X,X] *does* include X, it isn't an empty range. +class SMRange { +public: + SMLoc Start, End; + + SMRange() {} + SMRange(SMLoc Start, SMLoc End) : Start(Start), End(End) { + assert(Start.isValid() == End.isValid() && + "Start and end should either both be valid or both be invalid!"); + } + + bool isValid() const { return Start.isValid(); } +}; + +} // end namespace llvm #endif diff --git a/contrib/llvm/tools/clang/include/clang/Analysis/Support/SaveAndRestore.h b/contrib/llvm/include/llvm/Support/SaveAndRestore.h similarity index 92% rename from contrib/llvm/tools/clang/include/clang/Analysis/Support/SaveAndRestore.h rename to contrib/llvm/include/llvm/Support/SaveAndRestore.h index f720639490d9..ffa99b968d3c 100644 --- a/contrib/llvm/tools/clang/include/clang/Analysis/Support/SaveAndRestore.h +++ b/contrib/llvm/include/llvm/Support/SaveAndRestore.h @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_SAVERESTORE -#define LLVM_CLANG_ANALYSIS_SAVERESTORE +#ifndef LLVM_ADT_SAVERESTORE +#define LLVM_ADT_SAVERESTORE -namespace clang { +namespace llvm { // SaveAndRestore - A utility class that uses RAII to save and restore // the value of a variable. diff --git a/contrib/llvm/include/llvm/Support/SourceMgr.h b/contrib/llvm/include/llvm/Support/SourceMgr.h index deb8cafa06d2..58b8fab52402 100644 --- a/contrib/llvm/include/llvm/Support/SourceMgr.h +++ b/contrib/llvm/include/llvm/Support/SourceMgr.h @@ -17,10 +17,8 @@ #define SUPPORT_SOURCEMGR_H #include "llvm/Support/SMLoc.h" - +#include "llvm/ADT/ArrayRef.h" #include -#include -#include namespace llvm { class MemoryBuffer; @@ -33,10 +31,16 @@ namespace llvm { /// and handles diagnostic wrangling. class SourceMgr { public: + enum DiagKind { + DK_Error, + DK_Warning, + DK_Note + }; + /// DiagHandlerTy - Clients that want to handle their own diagnostics in a /// custom way can register a function pointer+context as a diagnostic /// handler. It gets called each time PrintMessage is invoked. - typedef void (*DiagHandlerTy)(const SMDiagnostic&, void *Context); + typedef void (*DiagHandlerTy)(const SMDiagnostic &, void *Context); private: struct SrcBuffer { /// Buffer - The memory buffer for the file. @@ -124,11 +128,8 @@ class SourceMgr { /// PrintMessage - Emit a message about the specified location with the /// specified string. /// - /// @param Type - If non-null, the kind of message (e.g., "error") which is - /// prefixed to the message. - /// @param ShowLine - Should the diagnostic show the source line. - void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type, - bool ShowLine = true) const; + void PrintMessage(SMLoc Loc, DiagKind Kind, const Twine &Msg, + ArrayRef Ranges = ArrayRef()) const; /// GetMessage - Return an SMDiagnostic at the specified location with the @@ -136,10 +137,8 @@ class SourceMgr { /// /// @param Type - If non-null, the kind of message (e.g., "error") which is /// prefixed to the message. - /// @param ShowLine - Should the diagnostic show the source line. - SMDiagnostic GetMessage(SMLoc Loc, - const Twine &Msg, const char *Type, - bool ShowLine = true) const; + SMDiagnostic GetMessage(SMLoc Loc, DiagKind Kind, const Twine &Msg, + ArrayRef Ranges = ArrayRef()) const; /// PrintIncludeStack - Prints the names of included files and the line of the /// file they were included from. A diagnostic handler can use this before @@ -158,35 +157,38 @@ class SMDiagnostic { SMLoc Loc; std::string Filename; int LineNo, ColumnNo; + SourceMgr::DiagKind Kind; std::string Message, LineContents; - unsigned ShowLine : 1; + std::vector > Ranges; public: // Null diagnostic. - SMDiagnostic() : SM(0), LineNo(0), ColumnNo(0), ShowLine(0) {} + SMDiagnostic() + : SM(0), LineNo(0), ColumnNo(0), Kind(SourceMgr::DK_Error) {} // Diagnostic with no location (e.g. file not found, command line arg error). - SMDiagnostic(const std::string &filename, const std::string &Msg) - : SM(0), Filename(filename), LineNo(-1), ColumnNo(-1), - Message(Msg), ShowLine(false) {} + SMDiagnostic(const std::string &filename, SourceMgr::DiagKind Kind, + const std::string &Msg) + : SM(0), Filename(filename), LineNo(-1), ColumnNo(-1), Kind(Kind), + Message(Msg) {} // Diagnostic with a location. SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN, - int Line, int Col, + int Line, int Col, SourceMgr::DiagKind Kind, const std::string &Msg, const std::string &LineStr, - bool showline = true) - : SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Message(Msg), - LineContents(LineStr), ShowLine(showline) {} + ArrayRef > Ranges); const SourceMgr *getSourceMgr() const { return SM; } SMLoc getLoc() const { return Loc; } const std::string &getFilename() const { return Filename; } int getLineNo() const { return LineNo; } int getColumnNo() const { return ColumnNo; } + SourceMgr::DiagKind getKind() const { return Kind; } const std::string &getMessage() const { return Message; } const std::string &getLineContents() const { return LineContents; } - bool getShowLine() const { return ShowLine; } - - void Print(const char *ProgName, raw_ostream &S) const; + const std::vector > &getRanges() const { + return Ranges; + } + void print(const char *ProgName, raw_ostream &S) const; }; } // end llvm namespace diff --git a/contrib/llvm/include/llvm/Support/StreamableMemoryObject.h b/contrib/llvm/include/llvm/Support/StreamableMemoryObject.h new file mode 100644 index 000000000000..531dbb216d7a --- /dev/null +++ b/contrib/llvm/include/llvm/Support/StreamableMemoryObject.h @@ -0,0 +1,181 @@ +//===- StreamableMemoryObject.h - Streamable data interface -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef STREAMABLEMEMORYOBJECT_H_ +#define STREAMABLEMEMORYOBJECT_H_ + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/DataStream.h" +#include + +namespace llvm { + +/// StreamableMemoryObject - Interface to data which might be streamed. +/// Streamability has 2 important implications/restrictions. First, the data +/// might not yet exist in memory when the request is made. This just means +/// that readByte/readBytes might have to block or do some work to get it. +/// More significantly, the exact size of the object might not be known until +/// it has all been fetched. This means that to return the right result, +/// getExtent must also wait for all the data to arrive; therefore it should +/// not be called on objects which are actually streamed (this would defeat +/// the purpose of streaming). Instead, isValidAddress and isObjectEnd can be +/// used to test addresses without knowing the exact size of the stream. +/// Finally, getPointer can be used instead of readBytes to avoid extra copying. +class StreamableMemoryObject : public MemoryObject { + public: + /// Destructor - Override as necessary. + virtual ~StreamableMemoryObject(); + + /// getBase - Returns the lowest valid address in the region. + /// + /// @result - The lowest valid address. + virtual uint64_t getBase() const = 0; + + /// getExtent - Returns the size of the region in bytes. (The region is + /// contiguous, so the highest valid address of the region + /// is getBase() + getExtent() - 1). + /// May block until all bytes in the stream have been read + /// + /// @result - The size of the region. + virtual uint64_t getExtent() const = 0; + + /// readByte - Tries to read a single byte from the region. + /// May block until (address - base) bytes have been read + /// @param address - The address of the byte, in the same space as getBase(). + /// @param ptr - A pointer to a byte to be filled in. Must be non-NULL. + /// @result - 0 if successful; -1 if not. Failure may be due to a + /// bounds violation or an implementation-specific error. + virtual int readByte(uint64_t address, uint8_t* ptr) const = 0; + + /// readBytes - Tries to read a contiguous range of bytes from the + /// region, up to the end of the region. + /// May block until (address - base + size) bytes have + /// been read. Additionally, StreamableMemoryObjects will + /// not do partial reads - if size bytes cannot be read, + /// readBytes will fail. + /// + /// @param address - The address of the first byte, in the same space as + /// getBase(). + /// @param size - The maximum number of bytes to copy. + /// @param buf - A pointer to a buffer to be filled in. Must be non-NULL + /// and large enough to hold size bytes. + /// @param copied - A pointer to a nunber that is filled in with the number + /// of bytes actually read. May be NULL. + /// @result - 0 if successful; -1 if not. Failure may be due to a + /// bounds violation or an implementation-specific error. + virtual int readBytes(uint64_t address, + uint64_t size, + uint8_t* buf, + uint64_t* copied) const = 0; + + /// getPointer - Ensures that the requested data is in memory, and returns + /// A pointer to it. More efficient than using readBytes if the + /// data is already in memory. + /// May block until (address - base + size) bytes have been read + /// @param address - address of the byte, in the same space as getBase() + /// @param size - amount of data that must be available on return + /// @result - valid pointer to the requested data + virtual const uint8_t *getPointer(uint64_t address, uint64_t size) const = 0; + + /// isValidAddress - Returns true if the address is within the object + /// (i.e. between base and base + extent - 1 inclusive) + /// May block until (address - base) bytes have been read + /// @param address - address of the byte, in the same space as getBase() + /// @result - true if the address may be read with readByte() + virtual bool isValidAddress(uint64_t address) const = 0; + + /// isObjectEnd - Returns true if the address is one past the end of the + /// object (i.e. if it is equal to base + extent) + /// May block until (address - base) bytes have been read + /// @param address - address of the byte, in the same space as getBase() + /// @result - true if the address is equal to base + extent + virtual bool isObjectEnd(uint64_t address) const = 0; +}; + +/// StreamingMemoryObject - interface to data which is actually streamed from +/// a DataStreamer. In addition to inherited members, it has the +/// dropLeadingBytes and setKnownObjectSize methods which are not applicable +/// to non-streamed objects. +class StreamingMemoryObject : public StreamableMemoryObject { +public: + StreamingMemoryObject(DataStreamer *streamer); + virtual uint64_t getBase() const { return 0; } + virtual uint64_t getExtent() const; + virtual int readByte(uint64_t address, uint8_t* ptr) const; + virtual int readBytes(uint64_t address, + uint64_t size, + uint8_t* buf, + uint64_t* copied) const ; + virtual const uint8_t *getPointer(uint64_t address, uint64_t size) const { + // This could be fixed by ensuring the bytes are fetched and making a copy, + // requiring that the bitcode size be known, or otherwise ensuring that + // the memory doesn't go away/get reallocated, but it's + // not currently necessary. Users that need the pointer don't stream. + assert(0 && "getPointer in streaming memory objects not allowed"); + return NULL; + } + virtual bool isValidAddress(uint64_t address) const; + virtual bool isObjectEnd(uint64_t address) const; + + /// Drop s bytes from the front of the stream, pushing the positions of the + /// remaining bytes down by s. This is used to skip past the bitcode header, + /// since we don't know a priori if it's present, and we can't put bytes + /// back into the stream once we've read them. + bool dropLeadingBytes(size_t s); + + /// If the data object size is known in advance, many of the operations can + /// be made more efficient, so this method should be called before reading + /// starts (although it can be called anytime). + void setKnownObjectSize(size_t size); + +private: + const static uint32_t kChunkSize = 4096 * 4; + mutable std::vector Bytes; + OwningPtr Streamer; + mutable size_t BytesRead; // Bytes read from stream + size_t BytesSkipped;// Bytes skipped at start of stream (e.g. wrapper/header) + mutable size_t ObjectSize; // 0 if unknown, set if wrapper seen or EOF reached + mutable bool EOFReached; + + // Fetch enough bytes such that Pos can be read or EOF is reached + // (i.e. BytesRead > Pos). Return true if Pos can be read. + // Unlike most of the functions in BitcodeReader, returns true on success. + // Most of the requests will be small, but we fetch at kChunkSize bytes + // at a time to avoid making too many potentially expensive GetBytes calls + bool fetchToPos(size_t Pos) const { + if (EOFReached) return Pos < ObjectSize; + while (Pos >= BytesRead) { + Bytes.resize(BytesRead + BytesSkipped + kChunkSize); + size_t bytes = Streamer->GetBytes(&Bytes[BytesRead + BytesSkipped], + kChunkSize); + BytesRead += bytes; + if (bytes < kChunkSize) { + if (ObjectSize && BytesRead < Pos) + assert(0 && "Unexpected short read fetching bitcode"); + if (BytesRead <= Pos) { // reached EOF/ran out of bytes + ObjectSize = BytesRead; + EOFReached = true; + return false; + } + } + } + return true; + } + + StreamingMemoryObject(const StreamingMemoryObject&); // DO NOT IMPLEMENT + void operator=(const StreamingMemoryObject&); // DO NOT IMPLEMENT +}; + +StreamableMemoryObject *getNonStreamedMemoryObject( + const unsigned char *Start, const unsigned char *End); + +} +#endif // STREAMABLEMEMORYOBJECT_H_ diff --git a/contrib/llvm/include/llvm/Support/TargetRegistry.h b/contrib/llvm/include/llvm/Support/TargetRegistry.h index 45f249d7ed0e..88081307ac6b 100644 --- a/contrib/llvm/include/llvm/Support/TargetRegistry.h +++ b/contrib/llvm/include/llvm/Support/TargetRegistry.h @@ -44,12 +44,14 @@ namespace llvm { class MCTargetAsmLexer; class MCTargetAsmParser; class TargetMachine; + class TargetOptions; class raw_ostream; class formatted_raw_ostream; MCStreamer *createAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, bool isVerboseAsm, bool useLoc, bool useCFI, + bool useDwarfDirectory, MCInstPrinter *InstPrint, MCCodeEmitter *CE, MCAsmBackend *TAB, @@ -73,7 +75,8 @@ namespace llvm { StringRef TT); typedef MCCodeGenInfo *(*MCCodeGenInfoCtorFnTy)(StringRef TT, Reloc::Model RM, - CodeModel::Model CM); + CodeModel::Model CM, + CodeGenOpt::Level OL); typedef MCInstrInfo *(*MCInstrInfoCtorFnTy)(void); typedef MCInstrAnalysis *(*MCInstrAnalysisCtorFnTy)(const MCInstrInfo*Info); typedef MCRegisterInfo *(*MCRegInfoCtorFnTy)(StringRef TT); @@ -84,8 +87,10 @@ namespace llvm { StringRef TT, StringRef CPU, StringRef Features, + const TargetOptions &Options, Reloc::Model RM, - CodeModel::Model CM); + CodeModel::Model CM, + CodeGenOpt::Level OL); typedef AsmPrinter *(*AsmPrinterCtorTy)(TargetMachine &TM, MCStreamer &Streamer); typedef MCAsmBackend *(*MCAsmBackendCtorTy)(const Target &T, StringRef TT); @@ -99,6 +104,8 @@ namespace llvm { typedef MCInstPrinter *(*MCInstPrinterCtorTy)(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); typedef MCCodeEmitter *(*MCCodeEmitterCtorTy)(const MCInstrInfo &II, const MCSubtargetInfo &STI, @@ -116,6 +123,7 @@ namespace llvm { bool isVerboseAsm, bool useLoc, bool useCFI, + bool useDwarfDirectory, MCInstPrinter *InstPrint, MCCodeEmitter *CE, MCAsmBackend *TAB, @@ -143,8 +151,8 @@ namespace llvm { /// registered. MCAsmInfoCtorFnTy MCAsmInfoCtorFn; - /// MCCodeGenInfoCtorFn - Constructor function for this target's MCCodeGenInfo, - /// if registered. + /// MCCodeGenInfoCtorFn - Constructor function for this target's + /// MCCodeGenInfo, if registered. MCCodeGenInfoCtorFnTy MCCodeGenInfoCtorFn; /// MCInstrInfoCtorFn - Constructor function for this target's MCInstrInfo, @@ -275,10 +283,11 @@ namespace llvm { /// createMCCodeGenInfo - Create a MCCodeGenInfo implementation. /// MCCodeGenInfo *createMCCodeGenInfo(StringRef Triple, Reloc::Model RM, - CodeModel::Model CM) const { + CodeModel::Model CM, + CodeGenOpt::Level OL) const { if (!MCCodeGenInfoCtorFn) return 0; - return MCCodeGenInfoCtorFn(Triple, RM, CM); + return MCCodeGenInfoCtorFn(Triple, RM, CM, OL); } /// createMCInstrInfo - Create a MCInstrInfo implementation. @@ -329,12 +338,14 @@ namespace llvm { /// either the target triple from the module, or the target triple of the /// host if that does not exist. TargetMachine *createTargetMachine(StringRef Triple, StringRef CPU, - StringRef Features, - Reloc::Model RM = Reloc::Default, - CodeModel::Model CM = CodeModel::Default) const { + StringRef Features, const TargetOptions &Options, + Reloc::Model RM = Reloc::Default, + CodeModel::Model CM = CodeModel::Default, + CodeGenOpt::Level OL = CodeGenOpt::Default) const { if (!TargetMachineCtorFn) return 0; - return TargetMachineCtorFn(*this, Triple, CPU, Features, RM, CM); + return TargetMachineCtorFn(*this, Triple, CPU, Features, Options, + RM, CM, OL); } /// createMCAsmBackend - Create a target specific assembly parser. @@ -383,10 +394,12 @@ namespace llvm { MCInstPrinter *createMCInstPrinter(unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) const { if (!MCInstPrinterCtorFn) return 0; - return MCInstPrinterCtorFn(*this, SyntaxVariant, MAI, STI); + return MCInstPrinterCtorFn(*this, SyntaxVariant, MAI, MII, MRI, STI); } @@ -426,13 +439,14 @@ namespace llvm { bool isVerboseAsm, bool useLoc, bool useCFI, + bool useDwarfDirectory, MCInstPrinter *InstPrint, MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) const { // AsmStreamerCtorFn is default to llvm::createAsmStreamer return AsmStreamerCtorFn(Ctx, OS, isVerboseAsm, useLoc, useCFI, - InstPrint, CE, TAB, ShowInst); + useDwarfDirectory, InstPrint, CE, TAB, ShowInst); } /// @} @@ -776,7 +790,7 @@ namespace llvm { /// extern "C" void LLVMInitializeFooTargetInfo() { /// RegisterTarget X(TheFooTarget, "foo", "Foo description"); /// } - template struct RegisterTarget { RegisterTarget(Target &T, const char *Name, const char *Desc) { @@ -840,8 +854,8 @@ namespace llvm { TargetRegistry::RegisterMCCodeGenInfo(T, &Allocator); } private: - static MCCodeGenInfo *Allocator(StringRef TT, - Reloc::Model RM, CodeModel::Model CM) { + static MCCodeGenInfo *Allocator(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL) { return new MCCodeGenInfoImpl(); } }; @@ -1010,9 +1024,11 @@ namespace llvm { private: static TargetMachine *Allocator(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, - CodeModel::Model CM) { - return new TargetMachineImpl(T, TT, CPU, FS, RM, CM); + CodeModel::Model CM, + CodeGenOpt::Level OL) { + return new TargetMachineImpl(T, TT, CPU, FS, Options, RM, CM, OL); } }; diff --git a/contrib/llvm/include/llvm/Support/TargetSelect.h b/contrib/llvm/include/llvm/Support/TargetSelect.h index 83ff68caaeac..a86e953f00ea 100644 --- a/contrib/llvm/include/llvm/Support/TargetSelect.h +++ b/contrib/llvm/include/llvm/Support/TargetSelect.h @@ -149,6 +149,18 @@ namespace llvm { #endif } + /// InitializeNativeTargetDisassembler - The main program should call + /// this function to initialize the native target disassembler. + inline bool InitializeNativeTargetDisassembler() { + // If we have a native target, initialize the corresponding disassembler. +#ifdef LLVM_NATIVE_DISASSEMBLER + LLVM_NATIVE_DISASSEMBLER(); + return false; +#else + return true; +#endif + } + } #endif diff --git a/contrib/llvm/include/llvm/Support/Valgrind.h b/contrib/llvm/include/llvm/Support/Valgrind.h index 7662eaaff5a9..e14764703932 100644 --- a/contrib/llvm/include/llvm/Support/Valgrind.h +++ b/contrib/llvm/include/llvm/Support/Valgrind.h @@ -16,8 +16,23 @@ #ifndef LLVM_SYSTEM_VALGRIND_H #define LLVM_SYSTEM_VALGRIND_H +#include "llvm/Support/Compiler.h" +#include "llvm/Config/llvm-config.h" #include +#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG) +// tsan (Thread Sanitizer) is a valgrind-based tool that detects these exact +// functions by name. +extern "C" { +LLVM_ATTRIBUTE_WEAK void AnnotateHappensAfter(const char *file, int line, + const volatile void *cv); +LLVM_ATTRIBUTE_WEAK void AnnotateHappensBefore(const char *file, int line, + const volatile void *cv); +LLVM_ATTRIBUTE_WEAK void AnnotateIgnoreWritesBegin(const char *file, int line); +LLVM_ATTRIBUTE_WEAK void AnnotateIgnoreWritesEnd(const char *file, int line); +} +#endif + namespace llvm { namespace sys { // True if Valgrind is controlling this process. @@ -26,6 +41,34 @@ namespace sys { // Discard valgrind's translation of code in the range [Addr .. Addr + Len). // Otherwise valgrind may continue to execute the old version of the code. void ValgrindDiscardTranslations(const void *Addr, size_t Len); + +#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG) + // Thread Sanitizer is a valgrind tool that finds races in code. + // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations . + + // This marker is used to define a happens-before arc. The race detector will + // infer an arc from the begin to the end when they share the same pointer + // argument. + #define TsanHappensBefore(cv) \ + AnnotateHappensBefore(__FILE__, __LINE__, cv) + + // This marker defines the destination of a happens-before arc. + #define TsanHappensAfter(cv) \ + AnnotateHappensAfter(__FILE__, __LINE__, cv) + + // Ignore any races on writes between here and the next TsanIgnoreWritesEnd. + #define TsanIgnoreWritesBegin() \ + AnnotateIgnoreWritesBegin(__FILE__, __LINE__) + + // Resume checking for racy writes. + #define TsanIgnoreWritesEnd() \ + AnnotateIgnoreWritesEnd(__FILE__, __LINE__) +#else + #define TsanHappensBefore(cv) + #define TsanHappensAfter(cv) + #define TsanIgnoreWritesBegin() + #define TsanIgnoreWritesEnd() +#endif } } diff --git a/contrib/llvm/include/llvm/Support/ValueHandle.h b/contrib/llvm/include/llvm/Support/ValueHandle.h index c0cdc35e99bf..b7210b2063ea 100644 --- a/contrib/llvm/include/llvm/Support/ValueHandle.h +++ b/contrib/llvm/include/llvm/Support/ValueHandle.h @@ -49,52 +49,61 @@ class ValueHandleBase { Tracking, Weak }; -private: +private: PointerIntPair PrevPair; ValueHandleBase *Next; - Value *VP; + + // A subclass may want to store some information along with the value + // pointer. Allow them to do this by making the value pointer a pointer-int + // pair. The 'setValPtrInt' and 'getValPtrInt' methods below give them this + // access. + PointerIntPair VP; explicit ValueHandleBase(const ValueHandleBase&); // DO NOT IMPLEMENT. public: explicit ValueHandleBase(HandleBaseKind Kind) - : PrevPair(0, Kind), Next(0), VP(0) {} + : PrevPair(0, Kind), Next(0), VP(0, 0) {} ValueHandleBase(HandleBaseKind Kind, Value *V) - : PrevPair(0, Kind), Next(0), VP(V) { - if (isValid(VP)) + : PrevPair(0, Kind), Next(0), VP(V, 0) { + if (isValid(VP.getPointer())) AddToUseList(); } ValueHandleBase(HandleBaseKind Kind, const ValueHandleBase &RHS) : PrevPair(0, Kind), Next(0), VP(RHS.VP) { - if (isValid(VP)) + if (isValid(VP.getPointer())) AddToExistingUseList(RHS.getPrevPtr()); } ~ValueHandleBase() { - if (isValid(VP)) + if (isValid(VP.getPointer())) RemoveFromUseList(); } Value *operator=(Value *RHS) { - if (VP == RHS) return RHS; - if (isValid(VP)) RemoveFromUseList(); - VP = RHS; - if (isValid(VP)) AddToUseList(); + if (VP.getPointer() == RHS) return RHS; + if (isValid(VP.getPointer())) RemoveFromUseList(); + VP.setPointer(RHS); + if (isValid(VP.getPointer())) AddToUseList(); return RHS; } Value *operator=(const ValueHandleBase &RHS) { - if (VP == RHS.VP) return RHS.VP; - if (isValid(VP)) RemoveFromUseList(); - VP = RHS.VP; - if (isValid(VP)) AddToExistingUseList(RHS.getPrevPtr()); - return VP; + if (VP.getPointer() == RHS.VP.getPointer()) return RHS.VP.getPointer(); + if (isValid(VP.getPointer())) RemoveFromUseList(); + VP.setPointer(RHS.VP.getPointer()); + if (isValid(VP.getPointer())) AddToExistingUseList(RHS.getPrevPtr()); + return VP.getPointer(); } Value *operator->() const { return getValPtr(); } Value &operator*() const { return *getValPtr(); } protected: - Value *getValPtr() const { return VP; } + Value *getValPtr() const { return VP.getPointer(); } + + void setValPtrInt(unsigned K) { VP.setInt(K); } + unsigned getValPtrInt() const { return VP.getInt(); } + static bool isValid(Value *V) { return V && V != DenseMapInfo::getEmptyKey() && diff --git a/contrib/llvm/include/llvm/Support/YAMLParser.h b/contrib/llvm/include/llvm/Support/YAMLParser.h new file mode 100644 index 000000000000..b24cacd3c328 --- /dev/null +++ b/contrib/llvm/include/llvm/Support/YAMLParser.h @@ -0,0 +1,549 @@ +//===--- YAMLParser.h - Simple YAML parser --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a YAML 1.2 parser. +// +// See http://www.yaml.org/spec/1.2/spec.html for the full standard. +// +// This currently does not implement the following: +// * Multi-line literal folding. +// * Tag resolution. +// * UTF-16. +// * BOMs anywhere other than the first Unicode scalar value in the file. +// +// The most important class here is Stream. This represents a YAML stream with +// 0, 1, or many documents. +// +// SourceMgr sm; +// StringRef input = getInput(); +// yaml::Stream stream(input, sm); +// +// for (yaml::document_iterator di = stream.begin(), de = stream.end(); +// di != de; ++di) { +// yaml::Node *n = di->getRoot(); +// if (n) { +// // Do something with n... +// } else +// break; +// } +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_YAML_PARSER_H +#define LLVM_SUPPORT_YAML_PARSER_H + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/SMLoc.h" + +#include +#include + +namespace llvm { +class MemoryBuffer; +class SourceMgr; +class raw_ostream; +class Twine; + +namespace yaml { + +class document_iterator; +class Document; +class Node; +class Scanner; +struct Token; + +/// @brief Dump all the tokens in this stream to OS. +/// @returns true if there was an error, false otherwise. +bool dumpTokens(StringRef Input, raw_ostream &); + +/// @brief Scans all tokens in input without outputting anything. This is used +/// for benchmarking the tokenizer. +/// @returns true if there was an error, false otherwise. +bool scanTokens(StringRef Input); + +/// @brief Escape \a Input for a double quoted scalar. +std::string escape(StringRef Input); + +/// @brief This class represents a YAML stream potentially containing multiple +/// documents. +class Stream { +public: + Stream(StringRef Input, SourceMgr &); + ~Stream(); + + document_iterator begin(); + document_iterator end(); + void skip(); + bool failed(); + bool validate() { + skip(); + return !failed(); + } + + void printError(Node *N, const Twine &Msg); + +private: + OwningPtr scanner; + OwningPtr CurrentDoc; + + friend class Document; + + /// @brief Validate a %YAML x.x directive. + void handleYAMLDirective(const Token &); +}; + +/// @brief Abstract base class for all Nodes. +class Node { +public: + enum NodeKind { + NK_Null, + NK_Scalar, + NK_KeyValue, + NK_Mapping, + NK_Sequence, + NK_Alias + }; + + Node(unsigned int Type, OwningPtr&, StringRef Anchor); + + /// @brief Get the value of the anchor attached to this node. If it does not + /// have one, getAnchor().size() will be 0. + StringRef getAnchor() const { return Anchor; } + + SMRange getSourceRange() const { return SourceRange; } + void setSourceRange(SMRange SR) { SourceRange = SR; } + + // These functions forward to Document and Scanner. + Token &peekNext(); + Token getNext(); + Node *parseBlockNode(); + BumpPtrAllocator &getAllocator(); + void setError(const Twine &Message, Token &Location) const; + bool failed() const; + + virtual void skip() {}; + + unsigned int getType() const { return TypeID; } + static inline bool classof(const Node *) { return true; } + + void *operator new ( size_t Size + , BumpPtrAllocator &Alloc + , size_t Alignment = 16) throw() { + return Alloc.Allocate(Size, Alignment); + } + + void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t) throw() { + Alloc.Deallocate(Ptr); + } + +protected: + OwningPtr &Doc; + SMRange SourceRange; + + void operator delete(void *) throw() {} + + virtual ~Node() {} + +private: + unsigned int TypeID; + StringRef Anchor; +}; + +/// @brief A null value. +/// +/// Example: +/// !!null null +class NullNode : public Node { +public: + NullNode(OwningPtr &D) : Node(NK_Null, D, StringRef()) {} + + static inline bool classof(const NullNode *) { return true; } + static inline bool classof(const Node *N) { + return N->getType() == NK_Null; + } +}; + +/// @brief A scalar node is an opaque datum that can be presented as a +/// series of zero or more Unicode scalar values. +/// +/// Example: +/// Adena +class ScalarNode : public Node { +public: + ScalarNode(OwningPtr &D, StringRef Anchor, StringRef Val) + : Node(NK_Scalar, D, Anchor) + , Value(Val) { + SMLoc Start = SMLoc::getFromPointer(Val.begin()); + SMLoc End = SMLoc::getFromPointer(Val.end() - 1); + SourceRange = SMRange(Start, End); + } + + // Return Value without any escaping or folding or other fun YAML stuff. This + // is the exact bytes that are contained in the file (after conversion to + // utf8). + StringRef getRawValue() const { return Value; } + + /// @brief Gets the value of this node as a StringRef. + /// + /// @param Storage is used to store the content of the returned StringRef iff + /// it requires any modification from how it appeared in the source. + /// This happens with escaped characters and multi-line literals. + StringRef getValue(SmallVectorImpl &Storage) const; + + static inline bool classof(const ScalarNode *) { return true; } + static inline bool classof(const Node *N) { + return N->getType() == NK_Scalar; + } + +private: + StringRef Value; + + StringRef unescapeDoubleQuoted( StringRef UnquotedValue + , StringRef::size_type Start + , SmallVectorImpl &Storage) const; +}; + +/// @brief A key and value pair. While not technically a Node under the YAML +/// representation graph, it is easier to treat them this way. +/// +/// TODO: Consider making this not a child of Node. +/// +/// Example: +/// Section: .text +class KeyValueNode : public Node { +public: + KeyValueNode(OwningPtr &D) + : Node(NK_KeyValue, D, StringRef()) + , Key(0) + , Value(0) + {} + + /// @brief Parse and return the key. + /// + /// This may be called multiple times. + /// + /// @returns The key, or nullptr if failed() == true. + Node *getKey(); + + /// @brief Parse and return the value. + /// + /// This may be called multiple times. + /// + /// @returns The value, or nullptr if failed() == true. + Node *getValue(); + + virtual void skip() { + getKey()->skip(); + getValue()->skip(); + } + + static inline bool classof(const KeyValueNode *) { return true; } + static inline bool classof(const Node *N) { + return N->getType() == NK_KeyValue; + } + +private: + Node *Key; + Node *Value; +}; + +/// @brief This is an iterator abstraction over YAML collections shared by both +/// sequences and maps. +/// +/// BaseT must have a ValueT* member named CurrentEntry and a member function +/// increment() which must set CurrentEntry to 0 to create an end iterator. +template +class basic_collection_iterator + : public std::iterator { +public: + basic_collection_iterator() : Base(0) {} + basic_collection_iterator(BaseT *B) : Base(B) {} + + ValueT *operator ->() const { + assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); + return Base->CurrentEntry; + } + + ValueT &operator *() const { + assert(Base && Base->CurrentEntry && + "Attempted to dereference end iterator!"); + return *Base->CurrentEntry; + } + + operator ValueT*() const { + assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); + return Base->CurrentEntry; + } + + bool operator !=(const basic_collection_iterator &Other) const { + if(Base != Other.Base) + return true; + return (Base && Other.Base) && Base->CurrentEntry + != Other.Base->CurrentEntry; + } + + basic_collection_iterator &operator++() { + assert(Base && "Attempted to advance iterator past end!"); + Base->increment(); + // Create an end iterator. + if (Base->CurrentEntry == 0) + Base = 0; + return *this; + } + +private: + BaseT *Base; +}; + +// The following two templates are used for both MappingNode and Sequence Node. +template +typename CollectionType::iterator begin(CollectionType &C) { + assert(C.IsAtBeginning && "You may only iterate over a collection once!"); + C.IsAtBeginning = false; + typename CollectionType::iterator ret(&C); + ++ret; + return ret; +} + +template +void skip(CollectionType &C) { + // TODO: support skipping from the middle of a parsed collection ;/ + assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!"); + if (C.IsAtBeginning) + for (typename CollectionType::iterator i = begin(C), e = C.end(); + i != e; ++i) + i->skip(); +} + +/// @brief Represents a YAML map created from either a block map for a flow map. +/// +/// This parses the YAML stream as increment() is called. +/// +/// Example: +/// Name: _main +/// Scope: Global +class MappingNode : public Node { +public: + enum MappingType { + MT_Block, + MT_Flow, + MT_Inline //< An inline mapping node is used for "[key: value]". + }; + + MappingNode(OwningPtr &D, StringRef Anchor, MappingType MT) + : Node(NK_Mapping, D, Anchor) + , Type(MT) + , IsAtBeginning(true) + , IsAtEnd(false) + , CurrentEntry(0) + {} + + friend class basic_collection_iterator; + typedef basic_collection_iterator iterator; + template friend typename T::iterator yaml::begin(T &); + template friend void yaml::skip(T &); + + iterator begin() { + return yaml::begin(*this); + } + + iterator end() { return iterator(); } + + virtual void skip() { + yaml::skip(*this); + } + + static inline bool classof(const MappingNode *) { return true; } + static inline bool classof(const Node *N) { + return N->getType() == NK_Mapping; + } + +private: + MappingType Type; + bool IsAtBeginning; + bool IsAtEnd; + KeyValueNode *CurrentEntry; + + void increment(); +}; + +/// @brief Represents a YAML sequence created from either a block sequence for a +/// flow sequence. +/// +/// This parses the YAML stream as increment() is called. +/// +/// Example: +/// - Hello +/// - World +class SequenceNode : public Node { +public: + enum SequenceType { + ST_Block, + ST_Flow, + // Use for: + // + // key: + // - val1 + // - val2 + // + // As a BlockMappingEntry and BlockEnd are not created in this case. + ST_Indentless + }; + + SequenceNode(OwningPtr &D, StringRef Anchor, SequenceType ST) + : Node(NK_Sequence, D, Anchor) + , SeqType(ST) + , IsAtBeginning(true) + , IsAtEnd(false) + , WasPreviousTokenFlowEntry(true) // Start with an imaginary ','. + , CurrentEntry(0) + {} + + friend class basic_collection_iterator; + typedef basic_collection_iterator iterator; + template friend typename T::iterator yaml::begin(T &); + template friend void yaml::skip(T &); + + void increment(); + + iterator begin() { + return yaml::begin(*this); + } + + iterator end() { return iterator(); } + + virtual void skip() { + yaml::skip(*this); + } + + static inline bool classof(const SequenceNode *) { return true; } + static inline bool classof(const Node *N) { + return N->getType() == NK_Sequence; + } + +private: + SequenceType SeqType; + bool IsAtBeginning; + bool IsAtEnd; + bool WasPreviousTokenFlowEntry; + Node *CurrentEntry; +}; + +/// @brief Represents an alias to a Node with an anchor. +/// +/// Example: +/// *AnchorName +class AliasNode : public Node { +public: + AliasNode(OwningPtr &D, StringRef Val) + : Node(NK_Alias, D, StringRef()), Name(Val) {} + + StringRef getName() const { return Name; } + Node *getTarget(); + + static inline bool classof(const ScalarNode *) { return true; } + static inline bool classof(const Node *N) { + return N->getType() == NK_Alias; + } + +private: + StringRef Name; +}; + +/// @brief A YAML Stream is a sequence of Documents. A document contains a root +/// node. +class Document { +public: + /// @brief Root for parsing a node. Returns a single node. + Node *parseBlockNode(); + + Document(Stream &ParentStream); + + /// @brief Finish parsing the current document and return true if there are + /// more. Return false otherwise. + bool skip(); + + /// @brief Parse and return the root level node. + Node *getRoot() { + if (Root) + return Root; + return Root = parseBlockNode(); + } + +private: + friend class Node; + friend class document_iterator; + + /// @brief Stream to read tokens from. + Stream &stream; + + /// @brief Used to allocate nodes to. All are destroyed without calling their + /// destructor when the document is destroyed. + BumpPtrAllocator NodeAllocator; + + /// @brief The root node. Used to support skipping a partially parsed + /// document. + Node *Root; + + Token &peekNext(); + Token getNext(); + void setError(const Twine &Message, Token &Location) const; + bool failed() const; + + void handleTagDirective(const Token &Tag) { + // TODO: Track tags. + } + + /// @brief Parse %BLAH directives and return true if any were encountered. + bool parseDirectives(); + + /// @brief Consume the next token and error if it is not \a TK. + bool expectToken(int TK); +}; + +/// @brief Iterator abstraction for Documents over a Stream. +class document_iterator { +public: + document_iterator() : Doc(NullDoc) {} + document_iterator(OwningPtr &D) : Doc(D) {} + + bool operator !=(const document_iterator &Other) { + return Doc != Other.Doc; + } + + document_iterator operator ++() { + if (!Doc->skip()) { + Doc.reset(0); + } else { + Stream &S = Doc->stream; + Doc.reset(new Document(S)); + } + return *this; + } + + Document &operator *() { + return *Doc; + } + + OwningPtr &operator ->() { + return Doc; + } + +private: + static OwningPtr NullDoc; + OwningPtr &Doc; +}; + +} +} + +#endif diff --git a/contrib/llvm/include/llvm/Support/system_error.h b/contrib/llvm/include/llvm/Support/system_error.h index 2c15b69c9420..af812069b9fe 100644 --- a/contrib/llvm/include/llvm/Support/system_error.h +++ b/contrib/llvm/include/llvm/Support/system_error.h @@ -470,17 +470,6 @@ template <> struct hash; namespace llvm { -template -struct integral_constant { - typedef T value_type; - static const value_type value = v; - typedef integral_constant type; - operator value_type() { return value; } -}; - -typedef integral_constant true_type; -typedef integral_constant false_type; - // is_error_code_enum template struct is_error_code_enum : public false_type {}; @@ -738,6 +727,10 @@ class error_code { public: error_code() : _val_(0), _cat_(&system_category()) {} + static error_code success() { + return error_code(); + } + error_code(int _val, const error_category& _cat) : _val_(_val), _cat_(&_cat) {} diff --git a/contrib/llvm/include/llvm/Support/type_traits.h b/contrib/llvm/include/llvm/Support/type_traits.h index 515295bdd66f..a3a551f851f3 100644 --- a/contrib/llvm/include/llvm/Support/type_traits.h +++ b/contrib/llvm/include/llvm/Support/type_traits.h @@ -17,6 +17,8 @@ #ifndef LLVM_SUPPORT_TYPE_TRAITS_H #define LLVM_SUPPORT_TYPE_TRAITS_H +#include "llvm/Support/DataTypes.h" +#include #include // This is actually the conforming implementation which works with abstract @@ -64,22 +66,99 @@ struct isPodLike { // std::pair's are pod-like if their elements are. template struct isPodLike > { - static const bool value = isPodLike::value & isPodLike::value; + static const bool value = isPodLike::value && isPodLike::value; }; +template +struct integral_constant { + typedef T value_type; + static const value_type value = v; + typedef integral_constant type; + operator value_type() { return value; } +}; + +typedef integral_constant true_type; +typedef integral_constant false_type; + /// \brief Metafunction that determines whether the two given types are /// equivalent. -template -struct is_same { - static const bool value = false; +template struct is_same : public false_type {}; +template struct is_same : public true_type {}; + +/// \brief Metafunction that removes const qualification from a type. +template struct remove_const { typedef T type; }; +template struct remove_const { typedef T type; }; + +/// \brief Metafunction that removes volatile qualification from a type. +template struct remove_volatile { typedef T type; }; +template struct remove_volatile { typedef T type; }; + +/// \brief Metafunction that removes both const and volatile qualification from +/// a type. +template struct remove_cv { + typedef typename remove_const::type>::type type; }; -template -struct is_same { - static const bool value = true; +/// \brief Helper to implement is_integral metafunction. +template struct is_integral_impl : false_type {}; +template <> struct is_integral_impl< bool> : true_type {}; +template <> struct is_integral_impl< char> : true_type {}; +template <> struct is_integral_impl< signed char> : true_type {}; +template <> struct is_integral_impl : true_type {}; +template <> struct is_integral_impl< wchar_t> : true_type {}; +template <> struct is_integral_impl< short> : true_type {}; +template <> struct is_integral_impl : true_type {}; +template <> struct is_integral_impl< int> : true_type {}; +template <> struct is_integral_impl : true_type {}; +template <> struct is_integral_impl< long> : true_type {}; +template <> struct is_integral_impl : true_type {}; +template <> struct is_integral_impl< long long> : true_type {}; +template <> struct is_integral_impl : true_type {}; + +/// \brief Metafunction that determines whether the given type is an integral +/// type. +template +struct is_integral : is_integral_impl {}; + +/// \brief Metafunction to remove reference from a type. +template struct remove_reference { typedef T type; }; +template struct remove_reference { typedef T type; }; + +/// \brief Metafunction that determines whether the given type is a pointer +/// type. +template struct is_pointer : false_type {}; +template struct is_pointer : true_type {}; +template struct is_pointer : true_type {}; +template struct is_pointer : true_type {}; +template struct is_pointer : true_type {}; + +/// \brief Metafunction that determines whether the given type is either an +/// integral type or an enumeration type. +/// +/// Note that this accepts potentially more integral types than we whitelist +/// above for is_integral because it is based on merely being convertible +/// implicitly to an integral type. +template class is_integral_or_enum { + // Provide an overload which can be called with anything implicitly + // convertible to an unsigned long long. This should catch integer types and + // enumeration types at least. We blacklist classes with conversion operators + // below. + static double check_int_convertible(unsigned long long); + static char check_int_convertible(...); + + typedef typename remove_reference::type UnderlyingT; + static UnderlyingT &nonce_instance; + +public: + enum { + value = (!is_class::value && !is_pointer::value && + !is_same::value && + !is_same::value && + sizeof(char) != sizeof(check_int_convertible(nonce_instance))) + }; }; - + // enable_if_c - Enable/disable a template based on a metafunction template struct enable_if_c { diff --git a/contrib/llvm/include/llvm/TableGen/Record.h b/contrib/llvm/include/llvm/TableGen/Record.h index afce76099867..5e68c10a47ac 100644 --- a/contrib/llvm/include/llvm/TableGen/Record.h +++ b/contrib/llvm/include/llvm/TableGen/Record.h @@ -20,6 +20,7 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include @@ -32,7 +33,6 @@ class BitsRecTy; class IntRecTy; class StringRecTy; class ListRecTy; -class CodeRecTy; class DagRecTy; class RecordRecTy; @@ -43,7 +43,6 @@ class BitInit; class BitsInit; class IntInit; class StringInit; -class CodeInit; class ListInit; class UnOpInit; class BinOpInit; @@ -68,6 +67,7 @@ class RecordKeeper; class RecTy { ListRecTy *ListTy; + virtual void anchor(); public: RecTy() : ListTy(0) {} virtual ~RecTy() {} @@ -99,7 +99,6 @@ class RecTy { virtual Init *convertValue( TernOpInit *UI) { return convertValue((TypedInit*)UI); } - virtual Init *convertValue( CodeInit *CI) { return 0; } virtual Init *convertValue(VarBitInit *VB) { return 0; } virtual Init *convertValue( DefInit *DI) { return 0; } virtual Init *convertValue( DagInit *DI) { return 0; } @@ -119,7 +118,6 @@ class RecTy { virtual bool baseClassOf(const IntRecTy *RHS) const { return false; } virtual bool baseClassOf(const StringRecTy *RHS) const { return false; } virtual bool baseClassOf(const ListRecTy *RHS) const { return false; } - virtual bool baseClassOf(const CodeRecTy *RHS) const { return false; } virtual bool baseClassOf(const DagRecTy *RHS) const { return false; } virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; } }; @@ -144,7 +142,6 @@ class BitRecTy : public RecTy { virtual Init *convertValue( IntInit *II); virtual Init *convertValue(StringInit *SI) { return 0; } virtual Init *convertValue( ListInit *LI) { return 0; } - virtual Init *convertValue( CodeInit *CI) { return 0; } virtual Init *convertValue(VarBitInit *VB) { return (Init*)VB; } virtual Init *convertValue( DefInit *DI) { return 0; } virtual Init *convertValue( DagInit *DI) { return 0; } @@ -165,7 +162,6 @@ class BitRecTy : public RecTy { virtual bool baseClassOf(const IntRecTy *RHS) const { return true; } virtual bool baseClassOf(const StringRecTy *RHS) const { return false; } virtual bool baseClassOf(const ListRecTy *RHS) const { return false; } - virtual bool baseClassOf(const CodeRecTy *RHS) const { return false; } virtual bool baseClassOf(const DagRecTy *RHS) const { return false; } virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; } @@ -189,7 +185,6 @@ class BitsRecTy : public RecTy { virtual Init *convertValue( IntInit *II); virtual Init *convertValue(StringInit *SI) { return 0; } virtual Init *convertValue( ListInit *LI) { return 0; } - virtual Init *convertValue( CodeInit *CI) { return 0; } virtual Init *convertValue(VarBitInit *VB) { return 0; } virtual Init *convertValue( DefInit *DI) { return 0; } virtual Init *convertValue( DagInit *DI) { return 0; } @@ -212,7 +207,6 @@ class BitsRecTy : public RecTy { virtual bool baseClassOf(const IntRecTy *RHS) const { return true; } virtual bool baseClassOf(const StringRecTy *RHS) const { return false; } virtual bool baseClassOf(const ListRecTy *RHS) const { return false; } - virtual bool baseClassOf(const CodeRecTy *RHS) const { return false; } virtual bool baseClassOf(const DagRecTy *RHS) const { return false; } virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; } @@ -233,7 +227,6 @@ class IntRecTy : public RecTy { virtual Init *convertValue( IntInit *II) { return (Init*)II; } virtual Init *convertValue(StringInit *SI) { return 0; } virtual Init *convertValue( ListInit *LI) { return 0; } - virtual Init *convertValue( CodeInit *CI) { return 0; } virtual Init *convertValue(VarBitInit *VB) { return 0; } virtual Init *convertValue( DefInit *DI) { return 0; } virtual Init *convertValue( DagInit *DI) { return 0; } @@ -255,7 +248,6 @@ class IntRecTy : public RecTy { virtual bool baseClassOf(const IntRecTy *RHS) const { return true; } virtual bool baseClassOf(const StringRecTy *RHS) const { return false; } virtual bool baseClassOf(const ListRecTy *RHS) const { return false; } - virtual bool baseClassOf(const CodeRecTy *RHS) const { return false; } virtual bool baseClassOf(const DagRecTy *RHS) const { return false; } virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; } @@ -279,7 +271,6 @@ class StringRecTy : public RecTy { virtual Init *convertValue( BinOpInit *BO); virtual Init *convertValue( TernOpInit *BO) { return RecTy::convertValue(BO);} - virtual Init *convertValue( CodeInit *CI) { return 0; } virtual Init *convertValue(VarBitInit *VB) { return 0; } virtual Init *convertValue( DefInit *DI) { return 0; } virtual Init *convertValue( DagInit *DI) { return 0; } @@ -298,7 +289,6 @@ class StringRecTy : public RecTy { virtual bool baseClassOf(const IntRecTy *RHS) const { return false; } virtual bool baseClassOf(const StringRecTy *RHS) const { return true; } virtual bool baseClassOf(const ListRecTy *RHS) const { return false; } - virtual bool baseClassOf(const CodeRecTy *RHS) const { return false; } virtual bool baseClassOf(const DagRecTy *RHS) const { return false; } virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; } }; @@ -322,7 +312,6 @@ class ListRecTy : public RecTy { virtual Init *convertValue( IntInit *II) { return 0; } virtual Init *convertValue(StringInit *SI) { return 0; } virtual Init *convertValue( ListInit *LI); - virtual Init *convertValue( CodeInit *CI) { return 0; } virtual Init *convertValue(VarBitInit *VB) { return 0; } virtual Init *convertValue( DefInit *DI) { return 0; } virtual Init *convertValue( DagInit *DI) { return 0; } @@ -346,47 +335,6 @@ class ListRecTy : public RecTy { virtual bool baseClassOf(const ListRecTy *RHS) const { return RHS->getElementType()->typeIsConvertibleTo(Ty); } - virtual bool baseClassOf(const CodeRecTy *RHS) const { return false; } - virtual bool baseClassOf(const DagRecTy *RHS) const { return false; } - virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; } -}; - -/// CodeRecTy - 'code' - Represent an code fragment, function or method. -/// -class CodeRecTy : public RecTy { - static CodeRecTy Shared; - CodeRecTy() {} -public: - static CodeRecTy *get() { return &Shared; } - - virtual Init *convertValue( UnsetInit *UI) { return (Init*)UI; } - virtual Init *convertValue( BitInit *BI) { return 0; } - virtual Init *convertValue( BitsInit *BI) { return 0; } - virtual Init *convertValue( IntInit *II) { return 0; } - virtual Init *convertValue(StringInit *SI) { return 0; } - virtual Init *convertValue( ListInit *LI) { return 0; } - virtual Init *convertValue( CodeInit *CI) { return (Init*)CI; } - virtual Init *convertValue(VarBitInit *VB) { return 0; } - virtual Init *convertValue( DefInit *DI) { return 0; } - virtual Init *convertValue( DagInit *DI) { return 0; } - virtual Init *convertValue( UnOpInit *UI) { return RecTy::convertValue(UI);} - virtual Init *convertValue( BinOpInit *UI) { return RecTy::convertValue(UI);} - virtual Init *convertValue( TernOpInit *UI) { return RecTy::convertValue(UI);} - virtual Init *convertValue( TypedInit *TI); - virtual Init *convertValue( VarInit *VI) { return RecTy::convertValue(VI);} - virtual Init *convertValue( FieldInit *FI) { return RecTy::convertValue(FI);} - - std::string getAsString() const { return "code"; } - - bool typeIsConvertibleTo(const RecTy *RHS) const { - return RHS->baseClassOf(this); - } - virtual bool baseClassOf(const BitRecTy *RHS) const { return false; } - virtual bool baseClassOf(const BitsRecTy *RHS) const { return false; } - virtual bool baseClassOf(const IntRecTy *RHS) const { return false; } - virtual bool baseClassOf(const StringRecTy *RHS) const { return false; } - virtual bool baseClassOf(const ListRecTy *RHS) const { return false; } - virtual bool baseClassOf(const CodeRecTy *RHS) const { return true; } virtual bool baseClassOf(const DagRecTy *RHS) const { return false; } virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; } }; @@ -405,7 +353,6 @@ class DagRecTy : public RecTy { virtual Init *convertValue( IntInit *II) { return 0; } virtual Init *convertValue(StringInit *SI) { return 0; } virtual Init *convertValue( ListInit *LI) { return 0; } - virtual Init *convertValue( CodeInit *CI) { return 0; } virtual Init *convertValue(VarBitInit *VB) { return 0; } virtual Init *convertValue( DefInit *DI) { return 0; } virtual Init *convertValue( UnOpInit *BO); @@ -427,7 +374,6 @@ class DagRecTy : public RecTy { virtual bool baseClassOf(const IntRecTy *RHS) const { return false; } virtual bool baseClassOf(const StringRecTy *RHS) const { return false; } virtual bool baseClassOf(const ListRecTy *RHS) const { return false; } - virtual bool baseClassOf(const CodeRecTy *RHS) const { return false; } virtual bool baseClassOf(const DagRecTy *RHS) const { return true; } virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; } }; @@ -451,7 +397,6 @@ class RecordRecTy : public RecTy { virtual Init *convertValue( IntInit *II) { return 0; } virtual Init *convertValue(StringInit *SI) { return 0; } virtual Init *convertValue( ListInit *LI) { return 0; } - virtual Init *convertValue( CodeInit *CI) { return 0; } virtual Init *convertValue(VarBitInit *VB) { return 0; } virtual Init *convertValue( UnOpInit *UI) { return RecTy::convertValue(UI);} virtual Init *convertValue( BinOpInit *UI) { return RecTy::convertValue(UI);} @@ -472,7 +417,6 @@ class RecordRecTy : public RecTy { virtual bool baseClassOf(const IntRecTy *RHS) const { return false; } virtual bool baseClassOf(const StringRecTy *RHS) const { return false; } virtual bool baseClassOf(const ListRecTy *RHS) const { return false; } - virtual bool baseClassOf(const CodeRecTy *RHS) const { return false; } virtual bool baseClassOf(const DagRecTy *RHS) const { return false; } virtual bool baseClassOf(const RecordRecTy *RHS) const; }; @@ -489,6 +433,7 @@ RecTy *resolveTypes(RecTy *T1, RecTy *T2); class Init { Init(const Init &); // Do not define. Init &operator=(const Init &); // Do not define. + virtual void anchor(); protected: Init(void) {} @@ -617,6 +562,7 @@ class UnsetInit : public Init { UnsetInit() : Init() {} UnsetInit(const UnsetInit &); // Do not define. UnsetInit &operator=(const UnsetInit &Other); // Do not define. + virtual void anchor(); public: static UnsetInit *get(); @@ -638,6 +584,7 @@ class BitInit : public Init { explicit BitInit(bool V) : Value(V) {} BitInit(const BitInit &Other); // Do not define. BitInit &operator=(BitInit &Other); // Do not define. + virtual void anchor(); public: static BitInit *get(bool V); @@ -725,8 +672,7 @@ class IntInit : public TypedInit { /// virtual Init *resolveBitReference(Record &R, const RecordVal *RV, unsigned Bit) const { - assert(0 && "Illegal bit reference off int"); - return 0; + llvm_unreachable("Illegal bit reference off int"); } /// resolveListElementReference - This method is used to implement @@ -734,8 +680,7 @@ class IntInit : public TypedInit { /// now, we return the resolved value, otherwise we return null. virtual Init *resolveListElementReference(Record &R, const RecordVal *RV, unsigned Elt) const { - assert(0 && "Illegal element reference off int"); - return 0; + llvm_unreachable("Illegal element reference off int"); } }; @@ -750,9 +695,10 @@ class StringInit : public TypedInit { StringInit(const StringInit &Other); // Do not define. StringInit &operator=(const StringInit &Other); // Do not define. + virtual void anchor(); public: - static StringInit *get(const std::string &V); + static StringInit *get(StringRef); const std::string &getValue() const { return Value; } @@ -769,8 +715,7 @@ class StringInit : public TypedInit { /// virtual Init *resolveBitReference(Record &R, const RecordVal *RV, unsigned Bit) const { - assert(0 && "Illegal bit reference off string"); - return 0; + llvm_unreachable("Illegal bit reference off string"); } /// resolveListElementReference - This method is used to implement @@ -778,33 +723,10 @@ class StringInit : public TypedInit { /// now, we return the resolved value, otherwise we return null. virtual Init *resolveListElementReference(Record &R, const RecordVal *RV, unsigned Elt) const { - assert(0 && "Illegal element reference off string"); - return 0; + llvm_unreachable("Illegal element reference off string"); } }; -/// CodeInit - "[{...}]" - Represent a code fragment. -/// -class CodeInit : public Init { - std::string Value; - - explicit CodeInit(const std::string &V) : Value(V) {} - - CodeInit(const CodeInit &Other); // Do not define. - CodeInit &operator=(const CodeInit &Other); // Do not define. - -public: - static CodeInit *get(const std::string &V); - - const std::string &getValue() const { return Value; } - - virtual Init *convertInitializerTo(RecTy *Ty) const { - return Ty->convertValue(const_cast(this)); - } - - virtual std::string getAsString() const { return "[{" + Value + "}]"; } -}; - /// ListInit - [AL, AH, CL] - Represent a list of defs /// class ListInit : public TypedInit, public FoldingSetNode { @@ -861,8 +783,7 @@ class ListInit : public TypedInit, public FoldingSetNode { /// virtual Init *resolveBitReference(Record &R, const RecordVal *RV, unsigned Bit) const { - assert(0 && "Illegal bit reference off list"); - return 0; + llvm_unreachable("Illegal bit reference off list"); } /// resolveListElementReference - This method is used to implement @@ -1058,9 +979,11 @@ class TernOpInit : public OpInit { /// VarInit - 'Opcode' - Represent a reference to an entire variable object. /// class VarInit : public TypedInit { - std::string VarName; + Init *VarName; explicit VarInit(const std::string &VN, RecTy *T) + : TypedInit(T), VarName(StringInit::get(VN)) {} + explicit VarInit(Init *VN, RecTy *T) : TypedInit(T), VarName(VN) {} VarInit(const VarInit &Other); // Do not define. @@ -1074,7 +997,11 @@ class VarInit : public TypedInit { return Ty->convertValue(const_cast(this)); } - const std::string &getName() const { return VarName; } + const std::string &getName() const; + Init *getNameInit() const { return VarName; } + std::string getNameInitAsString() const { + return getNameInit()->getAsUnquotedString(); + } virtual Init *resolveBitReference(Record &R, const RecordVal *RV, unsigned Bit) const; @@ -1092,7 +1019,7 @@ class VarInit : public TypedInit { /// virtual Init *resolveReferences(Record &R, const RecordVal *RV) const; - virtual std::string getAsString() const { return VarName; } + virtual std::string getAsString() const { return getName(); } }; @@ -1201,8 +1128,7 @@ class DefInit : public TypedInit { /// virtual Init *resolveBitReference(Record &R, const RecordVal *RV, unsigned Bit) const { - assert(0 && "Illegal bit reference off def"); - return 0; + llvm_unreachable("Illegal bit reference off def"); } /// resolveListElementReference - This method is used to implement @@ -1210,8 +1136,7 @@ class DefInit : public TypedInit { /// now, we return the resolved value, otherwise we return null. virtual Init *resolveListElementReference(Record &R, const RecordVal *RV, unsigned Elt) const { - assert(0 && "Illegal element reference off def"); - return 0; + llvm_unreachable("Illegal element reference off def"); } }; @@ -1320,14 +1245,12 @@ class DagInit : public TypedInit, public FoldingSetNode { virtual Init *resolveBitReference(Record &R, const RecordVal *RV, unsigned Bit) const { - assert(0 && "Illegal bit reference off dag"); - return 0; + llvm_unreachable("Illegal bit reference off dag"); } virtual Init *resolveListElementReference(Record &R, const RecordVal *RV, unsigned Elt) const { - assert(0 && "Illegal element reference off dag"); - return 0; + llvm_unreachable("Illegal element reference off dag"); } }; @@ -1345,6 +1268,10 @@ class RecordVal { RecordVal(const std::string &N, RecTy *T, unsigned P); const std::string &getName() const; + const Init *getNameInit() const { return Name; } + std::string getNameInitAsString() const { + return getNameInit()->getAsUnquotedString(); + } unsigned getPrefix() const { return Prefix; } RecTy *getType() const { return Ty; } @@ -1375,7 +1302,7 @@ class Record { unsigned ID; Init *Name; SMLoc Loc; - std::vector TemplateArgs; + std::vector TemplateArgs; std::vector Values; std::vector SuperClasses; @@ -1384,13 +1311,21 @@ class Record { DefInit *TheInit; + void init(); void checkName(); public: // Constructs a record. explicit Record(const std::string &N, SMLoc loc, RecordKeeper &records) : - ID(LastID++), Name(StringInit::get(N)), Loc(loc), TrackedRecords(records), TheInit(0) {} + ID(LastID++), Name(StringInit::get(N)), Loc(loc), TrackedRecords(records), + TheInit(0) { + init(); + } + explicit Record(Init *N, SMLoc loc, RecordKeeper &records) : + ID(LastID++), Name(N), Loc(loc), TrackedRecords(records), TheInit(0) { + init(); + } ~Record() {} @@ -1400,6 +1335,13 @@ class Record { unsigned getID() const { return ID; } const std::string &getName() const; + Init *getNameInit() const { + return Name; + } + const std::string getNameInitAsString() const { + return getNameInit()->getAsUnquotedString(); + } + void setName(Init *Name); // Also updates RecordKeeper. void setName(const std::string &Name); // Also updates RecordKeeper. @@ -1408,46 +1350,69 @@ class Record { /// get the corresponding DefInit. DefInit *getDefInit(); - const std::vector &getTemplateArgs() const { + const std::vector &getTemplateArgs() const { return TemplateArgs; } const std::vector &getValues() const { return Values; } const std::vector &getSuperClasses() const { return SuperClasses; } - bool isTemplateArg(StringRef Name) const { + bool isTemplateArg(Init *Name) const { for (unsigned i = 0, e = TemplateArgs.size(); i != e; ++i) if (TemplateArgs[i] == Name) return true; return false; } + bool isTemplateArg(StringRef Name) const { + return isTemplateArg(StringInit::get(Name.str())); + } - const RecordVal *getValue(StringRef Name) const { + const RecordVal *getValue(const Init *Name) const { for (unsigned i = 0, e = Values.size(); i != e; ++i) - if (Values[i].getName() == Name) return &Values[i]; + if (Values[i].getNameInit() == Name) return &Values[i]; + return 0; + } + const RecordVal *getValue(StringRef Name) const { + return getValue(StringInit::get(Name)); + } + RecordVal *getValue(const Init *Name) { + for (unsigned i = 0, e = Values.size(); i != e; ++i) + if (Values[i].getNameInit() == Name) return &Values[i]; return 0; } RecordVal *getValue(StringRef Name) { - for (unsigned i = 0, e = Values.size(); i != e; ++i) - if (Values[i].getName() == Name) return &Values[i]; - return 0; + return getValue(StringInit::get(Name)); } - void addTemplateArg(StringRef Name) { + void addTemplateArg(Init *Name) { assert(!isTemplateArg(Name) && "Template arg already defined!"); TemplateArgs.push_back(Name); } - - void addValue(const RecordVal &RV) { - assert(getValue(RV.getName()) == 0 && "Value already added!"); - Values.push_back(RV); + void addTemplateArg(StringRef Name) { + addTemplateArg(StringInit::get(Name.str())); } - void removeValue(StringRef Name) { + void addValue(const RecordVal &RV) { + assert(getValue(RV.getNameInit()) == 0 && "Value already added!"); + Values.push_back(RV); + if (Values.size() > 1) + // Keep NAME at the end of the list. It makes record dumps a + // bit prettier and allows TableGen tests to be written more + // naturally. Tests can use CHECK-NEXT to look for Record + // fields they expect to see after a def. They can't do that if + // NAME is the first Record field. + std::swap(Values[Values.size() - 2], Values[Values.size() - 1]); + } + + void removeValue(Init *Name) { for (unsigned i = 0, e = Values.size(); i != e; ++i) - if (Values[i].getName() == Name) { + if (Values[i].getNameInit() == Name) { Values.erase(Values.begin()+i); return; } - assert(0 && "Cannot remove an entry that does not exist!"); + llvm_unreachable("Cannot remove an entry that does not exist!"); + } + + void removeValue(StringRef Name) { + removeValue(StringInit::get(Name.str())); } bool isSubClassOf(const Record *R) const { @@ -1459,7 +1424,7 @@ class Record { bool isSubClassOf(StringRef Name) const { for (unsigned i = 0, e = SuperClasses.size(); i != e; ++i) - if (SuperClasses[i]->getName() == Name) + if (SuperClasses[i]->getNameInitAsString() == Name) return true; return false; } @@ -1553,12 +1518,6 @@ class Record { /// the value is not the right type. /// DagInit *getValueAsDag(StringRef FieldName) const; - - /// getValueAsCode - This method looks up the specified field and returns - /// its value as the string data in a CodeInit, throwing an exception if the - /// field does not exist or if the value is not a code object. - /// - std::string getValueAsCode(StringRef FieldName) const; }; raw_ostream &operator<<(raw_ostream &OS, const Record &R); @@ -1576,6 +1535,7 @@ struct MultiClass { class RecordKeeper { std::map Classes, Defs; + public: ~RecordKeeper() { for (std::map::iterator I = Classes.begin(), @@ -1598,12 +1558,12 @@ class RecordKeeper { return I == Defs.end() ? 0 : I->second; } void addClass(Record *R) { - assert(getClass(R->getName()) == 0 && "Class already exists!"); - Classes.insert(std::make_pair(R->getName(), R)); + assert(getClass(R->getNameInitAsString()) == 0 && "Class already exists!"); + Classes.insert(std::make_pair(R->getNameInitAsString(), R)); } void addDef(Record *R) { - assert(getDef(R->getName()) == 0 && "Def already exists!"); - Defs.insert(std::make_pair(R->getName(), R)); + assert(getDef(R->getNameInitAsString()) == 0 && "Def already exists!"); + Defs.insert(std::make_pair(R->getNameInitAsString(), R)); } /// removeClass - Remove, but do not delete, the specified record. @@ -1650,6 +1610,16 @@ struct LessRecordFieldName { raw_ostream &operator<<(raw_ostream &OS, const RecordKeeper &RK); +/// QualifyName - Return an Init with a qualifier prefix referring +/// to CurRec's name. +Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass, + Init *Name, const std::string &Scoper); + +/// QualifyName - Return an Init with a qualifier prefix referring +/// to CurRec's name. +Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass, + const std::string &Name, const std::string &Scoper); + } // End llvm namespace #endif diff --git a/contrib/llvm/include/llvm/TableGen/TableGenAction.h b/contrib/llvm/include/llvm/TableGen/TableGenAction.h index 9f1c23c5b457..733ae626447c 100644 --- a/contrib/llvm/include/llvm/TableGen/TableGenAction.h +++ b/contrib/llvm/include/llvm/TableGen/TableGenAction.h @@ -21,6 +21,7 @@ class raw_ostream; class RecordKeeper; class TableGenAction { + virtual void anchor(); public: virtual ~TableGenAction() {} diff --git a/contrib/llvm/include/llvm/TableGen/TableGenBackend.h b/contrib/llvm/include/llvm/TableGen/TableGenBackend.h index 853f92e406fb..3ebcd92d0e48 100644 --- a/contrib/llvm/include/llvm/TableGen/TableGenBackend.h +++ b/contrib/llvm/include/llvm/TableGen/TableGenBackend.h @@ -16,7 +16,6 @@ #define LLVM_TABLEGEN_TABLEGENBACKEND_H #include "llvm/Support/raw_ostream.h" -#include namespace llvm { @@ -24,6 +23,7 @@ class Record; class RecordKeeper; struct TableGenBackend { + virtual void anchor(); virtual ~TableGenBackend() {} // run - All TableGen backends should implement the run method, which should @@ -34,7 +34,7 @@ struct TableGenBackend { public: // Useful helper routines... /// EmitSourceFileHeader - Output a LLVM style file header to the specified /// ostream. - void EmitSourceFileHeader(const std::string &Desc, raw_ostream &OS) const; + void EmitSourceFileHeader(StringRef Desc, raw_ostream &OS) const; }; diff --git a/contrib/llvm/include/llvm/Target/Mangler.h b/contrib/llvm/include/llvm/Target/Mangler.h index c1c118b08cab..d5e165e58b91 100644 --- a/contrib/llvm/include/llvm/Target/Mangler.h +++ b/contrib/llvm/include/llvm/Target/Mangler.h @@ -17,11 +17,9 @@ #include "llvm/ADT/DenseMap.h" namespace llvm { -class StringRef; class Twine; -class Value; class GlobalValue; -template class SmallVectorImpl; +template class SmallVectorImpl; class MCContext; class MCSymbol; class TargetData; diff --git a/contrib/llvm/include/llvm/Target/Target.td b/contrib/llvm/include/llvm/Target/Target.td index aa9a4f5af18c..fa1ec5594522 100644 --- a/contrib/llvm/include/llvm/Target/Target.td +++ b/contrib/llvm/include/llvm/Target/Target.td @@ -22,8 +22,12 @@ include "llvm/Intrinsics.td" class RegisterClass; // Forward def // SubRegIndex - Use instances of SubRegIndex to identify subregisters. -class SubRegIndex { +class SubRegIndex comps = []> { string Namespace = ""; + + // ComposedOf - A list of two SubRegIndex instances, [A, B]. + // This indicates that this SubRegIndex is the result of composing A and B. + list ComposedOf = comps; } // RegAltNameIndex - The alternate name set to use for register operands of @@ -83,9 +87,15 @@ class Register altNames = []> { // CostPerUse - Additional cost of instructions using this register compared // to other registers in its class. The register allocator will try to // minimize the number of instructions using a register with a CostPerUse. - // This is used by the x86-64 and ARM Thumb targets where some registers + // This is used by the x86-64 and ARM Thumb targets where some registers // require larger instruction encodings. int CostPerUse = 0; + + // CoveredBySubRegs - When this bit is set, the value of this register is + // completely determined by the value of its sub-registers. For example, the + // x86 register AX is covered by its sub-registers AL and AH, but EAX is not + // covered by its sub-register AX. + bit CoveredBySubRegs = 0; } // RegisterWithSubRegs - This can be used to define instances of Register which @@ -194,12 +204,15 @@ class RegisterClass regTypes, int alignment, // // (decimate GPR, 2) - Pick every N'th element, starting with the first. // +// (interleave A, B, ...) - Interleave the elements from each argument list. +// // All of these operators work on ordered sets, not lists. That means // duplicates are removed from sub-expressions. // Set operators. The rest is defined in TargetSelectionDAG.td. def sequence; def decimate; +def interleave; // RegisterTuples - Automatically generate super-registers by forming tuples of // sub-registers. This is useful for modeling register sequence constraints @@ -356,6 +369,15 @@ class Instruction { // associated with them. Once we've migrated all of them over to true // pseudo-instructions that are lowered to real instructions prior to // the printer/emitter, we can remove this attribute and just use isPseudo. + // + // The intended use is: + // isPseudo: Does not have encoding information and should be expanded, + // at the latest, during lowering to MCInst. + // + // isCodeGenOnly: Does have encoding information and can go through to the + // CodeEmitter unchanged, but duplicates a canonical instruction + // definition's encoding and should be ignored when constructing the + // assembler match tables. bit isCodeGenOnly = 0; // Is this instruction a pseudo instruction for use by the assembler parser. @@ -414,7 +436,7 @@ class Predicate { /// NoHonorSignDependentRounding - This predicate is true if support for /// sign-dependent-rounding is not enabled. def NoHonorSignDependentRounding - : Predicate<"!HonorSignDependentRoundingFPMath()">; + : Predicate<"!TM.Options.HonorSignDependentRoundingFPMath()">; class Requires preds> { list Predicates = preds; @@ -679,6 +701,11 @@ def COPY : Instruction { let neverHasSideEffects = 1; let isAsCheapAsAMove = 1; } +def BUNDLE : Instruction { + let OutOperandList = (outs); + let InOperandList = (ins variable_ops); + let AsmString = "BUNDLE"; +} } //===----------------------------------------------------------------------===// @@ -698,7 +725,15 @@ class AsmParser { // function of the AsmParser class to call on every matched instruction. // This can be used to perform target specific instruction post-processing. string AsmParserInstCleanup = ""; +} +def DefaultAsmParser : AsmParser; +//===----------------------------------------------------------------------===// +// AsmParserVariant - Subtargets can have multiple different assembly parsers +// (e.g. AT&T vs Intel syntax on X86 for example). This class can be +// implemented by targets to describe such variants. +// +class AsmParserVariant { // Variant - AsmParsers can be of multiple different variants. Variants are // used to support targets that need to parser multiple formats for the // assembly language. @@ -715,7 +750,7 @@ class AsmParser { // purposes of matching. string RegisterPrefix = ""; } -def DefaultAsmParser : AsmParser; +def DefaultAsmParserVariant : AsmParserVariant; /// AssemblerPredicate - This is a Predicate that can be used when the assembler /// matches instructions and aliases. @@ -724,7 +759,20 @@ class AssemblerPredicate { string AssemblerCondString = cond; } - +/// TokenAlias - This class allows targets to define assembler token +/// operand aliases. That is, a token literal operand which is equivalent +/// to another, canonical, token literal. For example, ARM allows: +/// vmov.u32 s4, #0 -> vmov.i32, #0 +/// 'u32' is a more specific designator for the 32-bit integer type specifier +/// and is legal for any instruction which accepts 'i32' as a datatype suffix. +/// def : TokenAlias<".u32", ".i32">; +/// +/// This works by marking the match class of 'From' as a subclass of the +/// match class of 'To'. +class TokenAlias { + string FromToken = From; + string ToToken = To; +} /// MnemonicAlias - This class allows targets to define assembler mnemonic /// aliases. This should be used when all forms of one mnemonic are accepted @@ -813,6 +861,10 @@ class Target { // AssemblyParsers - The AsmParser instances available for this target. list AssemblyParsers = [DefaultAsmParser]; + /// AssemblyParserVariants - The AsmParserVariant instances available for + /// this target. + list AssemblyParserVariants = [DefaultAsmParserVariant]; + // AssemblyWriters - The AsmWriter instances available for this target. list AssemblyWriters = [DefaultAsmWriter]; } diff --git a/contrib/llvm/include/llvm/Target/TargetCallingConv.h b/contrib/llvm/include/llvm/Target/TargetCallingConv.h index 275957e01532..a6251e7d3345 100644 --- a/contrib/llvm/include/llvm/Target/TargetCallingConv.h +++ b/contrib/llvm/include/llvm/Target/TargetCallingConv.h @@ -14,6 +14,10 @@ #ifndef LLVM_TARGET_TARGETCALLINGCONV_H #define LLVM_TARGET_TARGETCALLINGCONV_H +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/MathExtras.h" +#include + namespace llvm { namespace ISD { diff --git a/contrib/llvm/include/llvm/Target/TargetCallingConv.td b/contrib/llvm/include/llvm/Target/TargetCallingConv.td index 6da3ba13bb35..a53ed29f1ec1 100644 --- a/contrib/llvm/include/llvm/Target/TargetCallingConv.td +++ b/contrib/llvm/include/llvm/Target/TargetCallingConv.td @@ -133,3 +133,14 @@ class CCDelegateTo : CCAction { class CallingConv actions> { list Actions = actions; } + +/// CalleeSavedRegs - A list of callee saved registers for a given calling +/// convention. The order of registers is used by PrologEpilogInsertion when +/// allocation stack slots for saved registers. +/// +/// For each CalleeSavedRegs def, TableGen will emit a FOO_SaveList array for +/// returning from getCalleeSavedRegs(), and a FOO_RegMask bit mask suitable for +/// returning from getCallPreservedMask(). +class CalleeSavedRegs { + dag SaveList = saves; +} diff --git a/contrib/llvm/include/llvm/Target/TargetData.h b/contrib/llvm/include/llvm/Target/TargetData.h index 26fd1870ac39..d116f392fb31 100644 --- a/contrib/llvm/include/llvm/Target/TargetData.h +++ b/contrib/llvm/include/llvm/Target/TargetData.h @@ -44,7 +44,7 @@ enum AlignTypeEnum { AGGREGATE_ALIGN = 'a', ///< Aggregate alignment STACK_ALIGN = 's' ///< Stack objects alignment }; - + /// Target alignment element. /// /// Stores the alignment data associated with a given alignment type (pointer, @@ -80,7 +80,7 @@ class TargetData : public ImmutablePass { unsigned StackNaturalAlign; ///< Stack natural alignment SmallVector LegalIntWidths; ///< Legal Integers. - + /// Alignments- Where the primitive type alignment data is stored. /// /// @sa init(). @@ -88,7 +88,7 @@ class TargetData : public ImmutablePass { /// pointers vs. 64-bit pointers by extending TargetAlignment, but for now, /// we don't. SmallVector Alignments; - + /// InvalidAlignmentElem - This member is a signal that a requested alignment /// type and bit width were not found in the SmallVector. static const TargetAlignElem InvalidAlignmentElem; @@ -112,19 +112,30 @@ class TargetData : public ImmutablePass { return &align != &InvalidAlignmentElem; } + /// Initialise a TargetData object with default values, ensure that the + /// target data pass is registered. + void init(); + public: /// Default ctor. /// /// @note This has to exist, because this is a pass, but it should never be /// used. TargetData(); - + /// Constructs a TargetData from a specification string. See init(). explicit TargetData(StringRef TargetDescription) : ImmutablePass(ID) { - init(TargetDescription); + std::string errMsg = parseSpecifier(TargetDescription, this); + assert(errMsg == "" && "Invalid target data layout string."); + (void)errMsg; } + /// Parses a target data specification string. Returns an error message + /// if the string is malformed, or the empty string on success. Optionally + /// initialises a TargetData object if passed a non-null pointer. + static std::string parseSpecifier(StringRef TargetDescription, TargetData* td = 0); + /// Initialize target data from properties stored in the module. explicit TargetData(const Module *M); @@ -141,9 +152,6 @@ class TargetData : public ImmutablePass { ~TargetData(); // Not virtual, do not subclass this class - //! Parse a target data layout string and initialize TargetData alignments. - void init(StringRef TargetDescription); - /// Target endianness... bool isLittleEndian() const { return LittleEndian; } bool isBigEndian() const { return !LittleEndian; } @@ -152,7 +160,7 @@ class TargetData : public ImmutablePass { /// TargetData. This representation is in the same format accepted by the /// string constructor above. std::string getStringRepresentation() const; - + /// isLegalInteger - This function returns true if the specified type is /// known to be a native integer type supported by the CPU. For example, /// i64 is not native on most 32-bit CPUs and i37 is not native on any known @@ -166,7 +174,7 @@ class TargetData : public ImmutablePass { return true; return false; } - + bool isIllegalInteger(unsigned Width) const { return !isLegalInteger(Width); } @@ -251,11 +259,11 @@ class TargetData : public ImmutablePass { /// getABITypeAlignment - Return the minimum ABI-required alignment for the /// specified type. unsigned getABITypeAlignment(Type *Ty) const; - + /// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for /// an integer type of the specified bitwidth. unsigned getABIIntegerTypeAlignment(unsigned BitWidth) const; - + /// getCallFrameTypeAlignment - Return the minimum ABI-required alignment /// for the specified type when it is part of a call frame. @@ -305,7 +313,7 @@ class TargetData : public ImmutablePass { assert((Alignment & (Alignment-1)) == 0 && "Alignment must be power of 2!"); return (Val + (Alignment-1)) & ~UIntTy(Alignment-1); } - + static char ID; // Pass identification, replacement for typeid }; diff --git a/contrib/llvm/include/llvm/Target/TargetELFWriterInfo.h b/contrib/llvm/include/llvm/Target/TargetELFWriterInfo.h index b97f3e2f4d0f..114295e8f985 100644 --- a/contrib/llvm/include/llvm/Target/TargetELFWriterInfo.h +++ b/contrib/llvm/include/llvm/Target/TargetELFWriterInfo.h @@ -15,9 +15,6 @@ #define LLVM_TARGET_TARGETELFWRITERINFO_H namespace llvm { - class Function; - class TargetData; - class TargetMachine; //===--------------------------------------------------------------------===// // TargetELFWriterInfo diff --git a/contrib/llvm/include/llvm/Target/TargetFrameLowering.h b/contrib/llvm/include/llvm/Target/TargetFrameLowering.h index 4c759b2ccb9f..d56db7b5118e 100644 --- a/contrib/llvm/include/llvm/Target/TargetFrameLowering.h +++ b/contrib/llvm/include/llvm/Target/TargetFrameLowering.h @@ -15,8 +15,6 @@ #define LLVM_TARGET_TARGETFRAMELOWERING_H #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/MC/MCDwarf.h" -#include "llvm/ADT/ArrayRef.h" #include #include @@ -24,8 +22,6 @@ namespace llvm { class CalleeSavedInfo; class MachineFunction; - class MachineBasicBlock; - class MachineMove; class RegScavenger; /// Information about stack frame layout on the target. It holds the direction diff --git a/contrib/llvm/include/llvm/Target/TargetInstrInfo.h b/contrib/llvm/include/llvm/Target/TargetInstrInfo.h index 07f614d61d93..d1e380c5602a 100644 --- a/contrib/llvm/include/llvm/Target/TargetInstrInfo.h +++ b/contrib/llvm/include/llvm/Target/TargetInstrInfo.h @@ -15,6 +15,7 @@ #define LLVM_TARGET_TARGETINSTRINFO_H #include "llvm/MC/MCInstrInfo.h" +#include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineFunction.h" namespace llvm { @@ -278,8 +279,7 @@ class TargetInstrInfo : public MCInstrInfo { /// This is only invoked in cases where AnalyzeBranch returns success. It /// returns the number of instructions that were removed. virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const { - assert(0 && "Target didn't implement TargetInstrInfo::RemoveBranch!"); - return 0; + llvm_unreachable("Target didn't implement TargetInstrInfo::RemoveBranch!"); } /// InsertBranch - Insert branch code into the end of the specified @@ -296,8 +296,7 @@ class TargetInstrInfo : public MCInstrInfo { MachineBasicBlock *FBB, const SmallVectorImpl &Cond, DebugLoc DL) const { - assert(0 && "Target didn't implement TargetInstrInfo::InsertBranch!"); - return 0; + llvm_unreachable("Target didn't implement TargetInstrInfo::InsertBranch!"); } /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything @@ -353,12 +352,28 @@ class TargetInstrInfo : public MCInstrInfo { return false; } + /// isProfitableToUnpredicate - Return true if it's profitable to unpredicate + /// one side of a 'diamond', i.e. two sides of if-else predicated on mutually + /// exclusive predicates. + /// e.g. + /// subeq r0, r1, #1 + /// addne r0, r1, #1 + /// => + /// sub r0, r1, #1 + /// addne r0, r1, #1 + /// + /// This may be profitable is conditional instructions are always executed. + virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const { + return false; + } + /// copyPhysReg - Emit instructions to copy a pair of physical registers. virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { - assert(0 && "Target didn't implement TargetInstrInfo::copyPhysReg!"); + llvm_unreachable("Target didn't implement TargetInstrInfo::copyPhysReg!"); } /// storeRegToStackSlot - Store the specified register of the given register @@ -371,7 +386,8 @@ class TargetInstrInfo : public MCInstrInfo { unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert(0 && "Target didn't implement TargetInstrInfo::storeRegToStackSlot!"); + llvm_unreachable("Target didn't implement " + "TargetInstrInfo::storeRegToStackSlot!"); } /// loadRegFromStackSlot - Load the specified register of the given register @@ -383,7 +399,8 @@ class TargetInstrInfo : public MCInstrInfo { unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert(0 && "Target didn't implement TargetInstrInfo::loadRegFromStackSlot!"); + llvm_unreachable("Target didn't implement " + "TargetInstrInfo::loadRegFromStackSlot!"); } /// expandPostRAPseudo - This function is called for all pseudo instructions @@ -535,7 +552,7 @@ class TargetInstrInfo : public MCInstrInfo { /// isUnpredicatedTerminator - Returns true if the instruction is a /// terminator instruction that has not been predicated. - virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; + virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const = 0; /// PredicateInstruction - Convert the instruction into a predicated /// instruction. It returns true if the operation was successful. @@ -646,7 +663,16 @@ class TargetInstrInfo : public MCInstrInfo { virtual int getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode, unsigned DefIdx, - SDNode *UseNode, unsigned UseIdx) const; + SDNode *UseNode, unsigned UseIdx) const = 0; + + /// getOutputLatency - Compute and return the output dependency latency of a + /// a given pair of defs which both target the same register. This is usually + /// one. + virtual unsigned getOutputLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *DepMI) const { + return 1; + } /// getInstrLatency - Compute the instruction latency of a given instruction. /// If the instruction has higher cost when predicated, it's returned via @@ -656,7 +682,7 @@ class TargetInstrInfo : public MCInstrInfo { unsigned *PredCost = 0) const; virtual int getInstrLatency(const InstrItineraryData *ItinData, - SDNode *Node) const; + SDNode *Node) const = 0; /// isHighLatencyDef - Return true if this opcode has high latency to its /// result. @@ -718,6 +744,80 @@ class TargetInstrInfo : public MCInstrInfo { /// virtual void setExecutionDomain(MachineInstr *MI, unsigned Domain) const {} + + /// getPartialRegUpdateClearance - Returns the preferred minimum clearance + /// before an instruction with an unwanted partial register update. + /// + /// Some instructions only write part of a register, and implicitly need to + /// read the other parts of the register. This may cause unwanted stalls + /// preventing otherwise unrelated instructions from executing in parallel in + /// an out-of-order CPU. + /// + /// For example, the x86 instruction cvtsi2ss writes its result to bits + /// [31:0] of the destination xmm register. Bits [127:32] are unaffected, so + /// the instruction needs to wait for the old value of the register to become + /// available: + /// + /// addps %xmm1, %xmm0 + /// movaps %xmm0, (%rax) + /// cvtsi2ss %rbx, %xmm0 + /// + /// In the code above, the cvtsi2ss instruction needs to wait for the addps + /// instruction before it can issue, even though the high bits of %xmm0 + /// probably aren't needed. + /// + /// This hook returns the preferred clearance before MI, measured in + /// instructions. Other defs of MI's operand OpNum are avoided in the last N + /// instructions before MI. It should only return a positive value for + /// unwanted dependencies. If the old bits of the defined register have + /// useful values, or if MI is determined to otherwise read the dependency, + /// the hook should return 0. + /// + /// The unwanted dependency may be handled by: + /// + /// 1. Allocating the same register for an MI def and use. That makes the + /// unwanted dependency identical to a required dependency. + /// + /// 2. Allocating a register for the def that has no defs in the previous N + /// instructions. + /// + /// 3. Calling breakPartialRegDependency() with the same arguments. This + /// allows the target to insert a dependency breaking instruction. + /// + virtual unsigned + getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const { + // The default implementation returns 0 for no partial register dependency. + return 0; + } + + /// breakPartialRegDependency - Insert a dependency-breaking instruction + /// before MI to eliminate an unwanted dependency on OpNum. + /// + /// If it wasn't possible to avoid a def in the last N instructions before MI + /// (see getPartialRegUpdateClearance), this hook will be called to break the + /// unwanted dependency. + /// + /// On x86, an xorps instruction can be used as a dependency breaker: + /// + /// addps %xmm1, %xmm0 + /// movaps %xmm0, (%rax) + /// xorps %xmm0, %xmm0 + /// cvtsi2ss %rbx, %xmm0 + /// + /// An operand should be added to MI if an instruction was + /// inserted. This ties the instructions together in the post-ra scheduler. + /// + virtual void + breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const {} + + /// Create machine specific model for scheduling. + virtual DFAPacketizer* + CreateTargetScheduleState(const TargetMachine*, const ScheduleDAG*) const { + return NULL; + } + private: int CallFrameSetupOpcode, CallFrameDestroyOpcode; }; @@ -746,6 +846,7 @@ class TargetInstrInfoImpl : public TargetInstrInfo { virtual bool hasStoreToStackSlot(const MachineInstr *MI, const MachineMemOperand *&MMO, int &FrameIndex) const; + virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; virtual bool PredicateInstruction(MachineInstr *MI, const SmallVectorImpl &Pred) const; virtual void reMaterialize(MachineBasicBlock &MBB, @@ -761,6 +862,13 @@ class TargetInstrInfoImpl : public TargetInstrInfo { virtual bool isSchedulingBoundary(const MachineInstr *MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const; + using TargetInstrInfo::getOperandLatency; + virtual int getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, unsigned UseIdx) const; + using TargetInstrInfo::getInstrLatency; + virtual int getInstrLatency(const InstrItineraryData *ItinData, + SDNode *Node) const; bool usePreRAHazardRecognizer() const; diff --git a/contrib/llvm/include/llvm/Target/TargetJITInfo.h b/contrib/llvm/include/llvm/Target/TargetJITInfo.h index b198eb62f0c6..044afd9b7392 100644 --- a/contrib/llvm/include/llvm/Target/TargetJITInfo.h +++ b/contrib/llvm/include/llvm/Target/TargetJITInfo.h @@ -17,9 +17,9 @@ #ifndef LLVM_TARGET_TARGETJITINFO_H #define LLVM_TARGET_TARGETJITINFO_H -#include #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/DataTypes.h" +#include namespace llvm { class Function; @@ -30,6 +30,7 @@ namespace llvm { /// TargetJITInfo - Target specific information required by the Just-In-Time /// code generator. class TargetJITInfo { + virtual void anchor(); public: virtual ~TargetJITInfo() {} @@ -45,8 +46,8 @@ namespace llvm { /// ptr. virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, JITCodeEmitter &JCE) { - assert(0 && "This target doesn't implement emitGlobalValueIndirectSym!"); - return 0; + llvm_unreachable("This target doesn't implement " + "emitGlobalValueIndirectSym!"); } /// Records the required size and alignment for a call stub in bytes. @@ -57,8 +58,6 @@ namespace llvm { /// Returns the maximum size and alignment for a call stub on this target. virtual StubLayout getStubLayout() { llvm_unreachable("This target doesn't implement getStubLayout!"); - StubLayout Result = {0, 0}; - return Result; } /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a @@ -68,15 +67,13 @@ namespace llvm { /// aligned from the address the JCE was set up to emit at. virtual void *emitFunctionStub(const Function* F, void *Target, JITCodeEmitter &JCE) { - assert(0 && "This target doesn't implement emitFunctionStub!"); - return 0; + llvm_unreachable("This target doesn't implement emitFunctionStub!"); } /// getPICJumpTableEntry - Returns the value of the jumptable entry for the /// specific basic block. virtual uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase) { - assert(0 && "This target doesn't implement getPICJumpTableEntry!"); - return 0; + llvm_unreachable("This target doesn't implement getPICJumpTableEntry!"); } /// LazyResolverFn - This typedef is used to represent the function that @@ -97,8 +94,7 @@ namespace llvm { /// function, and giving the JIT the target function used to do the lazy /// resolving. virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn) { - assert(0 && "Not implemented for this target!"); - return 0; + llvm_unreachable("Not implemented for this target!"); } /// relocate - Before the JIT can run a block of code that has been emitted, @@ -114,8 +110,7 @@ namespace llvm { /// handling thread local variables. This method returns a value only /// meaningful to the target. virtual char* allocateThreadLocalMemory(size_t size) { - assert(0 && "This target does not implement thread local storage!"); - return 0; + llvm_unreachable("This target does not implement thread local storage!"); } /// needsGOT - Allows a target to specify that it would like the diff --git a/contrib/llvm/include/llvm/Target/TargetLibraryInfo.h b/contrib/llvm/include/llvm/Target/TargetLibraryInfo.h index 02a1a3ca23a4..70e26bf3c5a4 100644 --- a/contrib/llvm/include/llvm/Target/TargetLibraryInfo.h +++ b/contrib/llvm/include/llvm/Target/TargetLibraryInfo.h @@ -11,33 +11,204 @@ #define LLVM_TARGET_TARGETLIBRARYINFO_H #include "llvm/Pass.h" +#include "llvm/ADT/DenseMap.h" namespace llvm { class Triple; namespace LibFunc { enum Func { - /// void *memset(void *b, int c, size_t len); - memset, - - // void *memcpy(void *s1, const void *s2, size_t n); - memcpy, - - // void *memmove(void *s1, const void *s2, size_t n); - memmove, - - /// void memset_pattern16(void *b, const void *pattern16, size_t len); - memset_pattern16, - - /// int iprintf(const char *format, ...); - iprintf, - - /// int siprintf(char *str, const char *format, ...); - siprintf, - + /// double acos(double x); + acos, + /// long double acosl(long double x); + acosl, + /// float acosf(float x); + acosf, + /// double asin(double x); + asin, + /// long double asinl(long double x); + asinl, + /// float asinf(float x); + asinf, + /// double atan(double x); + atan, + /// long double atanl(long double x); + atanl, + /// float atanf(float x); + atanf, + /// double atan2(double y, double x); + atan2, + /// long double atan2l(long double y, long double x); + atan2l, + /// float atan2f(float y, float x); + atan2f, + /// double ceil(double x); + ceil, + /// long double ceill(long double x); + ceill, + /// float ceilf(float x); + ceilf, + /// double copysign(double x, double y); + copysign, + /// float copysignf(float x, float y); + copysignf, + /// long double copysignl(long double x, long double y); + copysignl, + /// double cos(double x); + cos, + /// long double cosl(long double x); + cosl, + /// float cosf(float x); + cosf, + /// double cosh(double x); + cosh, + /// long double coshl(long double x); + coshl, + /// float coshf(float x); + coshf, + /// double exp(double x); + exp, + /// long double expl(long double x); + expl, + /// float expf(float x); + expf, + /// double exp2(double x); + exp2, + /// long double exp2l(long double x); + exp2l, + /// float exp2f(float x); + exp2f, + /// double expm1(double x); + expm1, + /// long double expm1l(long double x); + expm1l, + /// float expm1f(float x); + expl1f, + /// double fabs(double x); + fabs, + /// long double fabsl(long double x); + fabsl, + /// float fabsf(float x); + fabsf, + /// double floor(double x); + floor, + /// long double floorl(long double x); + floorl, + /// float floorf(float x); + floorf, /// int fiprintf(FILE *stream, const char *format, ...); fiprintf, - + /// double fmod(double x, double y); + fmod, + /// long double fmodl(long double x, long double y); + fmodl, + /// float fmodf(float x, float y); + fmodf, + /// int fputs(const char *s, FILE *stream); + fputs, + /// size_t fwrite(const void *ptr, size_t size, size_t nitems, + /// FILE *stream); + fwrite, + /// int iprintf(const char *format, ...); + iprintf, + /// double log(double x); + log, + /// long double logl(long double x); + logl, + /// float logf(float x); + logf, + /// double log2(double x); + log2, + /// double long double log2l(long double x); + log2l, + /// float log2f(float x); + log2f, + /// double log10(double x); + log10, + /// long double log10l(long double x); + log10l, + /// float log10f(float x); + log10f, + /// double log1p(double x); + log1p, + /// long double log1pl(long double x); + log1pl, + /// float log1pf(float x); + log1pf, + /// void *memcpy(void *s1, const void *s2, size_t n); + memcpy, + /// void *memmove(void *s1, const void *s2, size_t n); + memmove, + /// void *memset(void *b, int c, size_t len); + memset, + /// void memset_pattern16(void *b, const void *pattern16, size_t len); + memset_pattern16, + /// double nearbyint(double x); + nearbyint, + /// float nearbyintf(float x); + nearbyintf, + /// long double nearbyintl(long double x); + nearbyintl, + /// double pow(double x, double y); + pow, + /// float powf(float x, float y); + powf, + /// long double powl(long double x, long double y); + powl, + /// double rint(double x); + rint, + /// float rintf(float x); + rintf, + /// long dobule rintl(long double x); + rintl, + /// double sin(double x); + sin, + /// long double sinl(long double x); + sinl, + /// float sinf(float x); + sinf, + /// double sinh(double x); + sinh, + /// long double sinhl(long double x); + sinhl, + /// float sinhf(float x); + sinhf, + /// int siprintf(char *str, const char *format, ...); + siprintf, + /// double sqrt(double x); + sqrt, + /// long double sqrtl(long double x); + sqrtl, + /// float sqrtf(float x); + sqrtf, + /// double tan(double x); + tan, + /// long double tanl(long double x); + tanl, + /// float tanf(float x); + tanf, + /// double tanh(double x); + tanh, + /// long double tanhl(long double x); + tanhl, + /// float tanhf(float x); + tanhf, + /// double trunc(double x); + trunc, + /// float truncf(float x); + truncf, + /// long double truncl(long double x); + truncl, + /// int __cxa_atexit(void (*f)(void *), void *p, void *d); + cxa_atexit, + /// void __cxa_guard_abort(guard_t *guard); + /// guard_t is int64_t in Itanium ABI or int32_t on ARM eabi. + cxa_guard_abort, + /// int __cxa_guard_acquire(guard_t *guard); + cxa_guard_acquire, + /// void __cxa_guard_release(guard_t *guard); + cxa_guard_release, + NumLibFuncs }; } @@ -46,7 +217,24 @@ namespace llvm { /// library functions are available for the current target, and allows a /// frontend to disable optimizations through -fno-builtin etc. class TargetLibraryInfo : public ImmutablePass { - unsigned char AvailableArray[(LibFunc::NumLibFuncs+7)/8]; + virtual void anchor(); + unsigned char AvailableArray[(LibFunc::NumLibFuncs+3)/4]; + llvm::DenseMap CustomNames; + static const char* StandardNames[LibFunc::NumLibFuncs]; + + enum AvailabilityState { + StandardName = 3, // (memset to all ones) + CustomName = 1, + Unavailable = 0 // (memset to all zeros) + }; + void setState(LibFunc::Func F, AvailabilityState State) { + AvailableArray[F/4] &= ~(3 << 2*(F&3)); + AvailableArray[F/4] |= State << 2*(F&3); + } + AvailabilityState getState(LibFunc::Func F) const { + return static_cast((AvailableArray[F/4] >> 2*(F&3)) & 3); + } + public: static char ID; TargetLibraryInfo(); @@ -56,19 +244,39 @@ class TargetLibraryInfo : public ImmutablePass { /// has - This function is used by optimizations that want to match on or form /// a given library function. bool has(LibFunc::Func F) const { - return (AvailableArray[F/8] & (1 << (F&7))) != 0; + return getState(F) != Unavailable; + } + + StringRef getName(LibFunc::Func F) const { + AvailabilityState State = getState(F); + if (State == Unavailable) + return StringRef(); + if (State == StandardName) + return StandardNames[F]; + assert(State == CustomName); + return CustomNames.find(F)->second; } /// setUnavailable - this can be used by whatever sets up TargetLibraryInfo to /// ban use of specific library functions. void setUnavailable(LibFunc::Func F) { - AvailableArray[F/8] &= ~(1 << (F&7)); + setState(F, Unavailable); } void setAvailable(LibFunc::Func F) { - AvailableArray[F/8] |= 1 << (F&7); + setState(F, StandardName); } - + + void setAvailableWithName(LibFunc::Func F, StringRef Name) { + if (StandardNames[F] != Name) { + setState(F, CustomName); + CustomNames[F] = Name; + assert(CustomNames.find(F) != CustomNames.end()); + } else { + setState(F, StandardName); + } + } + /// disableAllFunctions - This disables all builtins, which is used for /// options like -fno-builtin. void disableAllFunctions(); diff --git a/contrib/llvm/include/llvm/Target/TargetLowering.h b/contrib/llvm/include/llvm/Target/TargetLowering.h index 013e70a05c03..720c9df99e2b 100644 --- a/contrib/llvm/include/llvm/Target/TargetLowering.h +++ b/contrib/llvm/include/llvm/Target/TargetLowering.h @@ -25,7 +25,6 @@ #include "llvm/CallingConv.h" #include "llvm/InlineAsm.h" #include "llvm/Attributes.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/Support/DebugLoc.h" @@ -36,41 +35,34 @@ #include namespace llvm { - class AllocaInst; - class APFloat; class CallInst; class CCState; - class Function; class FastISel; class FunctionLoweringInfo; class ImmutableCallSite; + class IntrinsicInst; class MachineBasicBlock; class MachineFunction; - class MachineFrameInfo; class MachineInstr; class MachineJumpTableInfo; class MCContext; class MCExpr; - class SDNode; - class SDValue; - class SelectionDAG; template class SmallVectorImpl; class TargetData; - class TargetMachine; class TargetRegisterClass; class TargetLoweringObjectFile; class Value; - // FIXME: should this be here? - namespace TLSModel { - enum Model { - GeneralDynamic, - LocalDynamic, - InitialExec, - LocalExec + namespace Sched { + enum Preference { + None, // No preference + Source, // Follow source order. + RegPressure, // Scheduling for lowest register pressure. + Hybrid, // Scheduling for both latency and register pressure. + ILP, // Scheduling for ILP in low register pressure mode. + VLIW // Scheduling for VLIW targets. }; } - TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc); //===----------------------------------------------------------------------===// @@ -94,7 +86,7 @@ class TargetLowering { Custom // Use the LowerOperation hook to implement custom lowering. }; - /// LegalizeAction - This enum indicates whether a types are legal for a + /// LegalizeTypeAction - This enum indicates whether a types are legal for a /// target, and if not, what action should be used to make them valid. enum LegalizeTypeAction { TypeLegal, // The target natively supports this type. @@ -115,8 +107,6 @@ class TargetLowering { static ISD::NodeType getExtendForContent(BooleanContent Content) { switch (Content) { - default: - assert(false && "Unknown BooleanContent!"); case UndefinedBooleanContent: // Extend by adding rubbish bits. return ISD::ANY_EXTEND; @@ -127,6 +117,7 @@ class TargetLowering { // Extend by copying the sign bit. return ISD::SIGN_EXTEND; } + llvm_unreachable("Invalid content kind"); } /// NOTE: The constructor takes ownership of TLOF. @@ -199,9 +190,9 @@ class TargetLowering { /// getRegClassFor - Return the register class that should be used for the /// specified value type. - virtual TargetRegisterClass *getRegClassFor(EVT VT) const { + virtual const TargetRegisterClass *getRegClassFor(EVT VT) const { assert(VT.isSimple() && "getRegClassFor called on illegal type!"); - TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy]; + const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy]; assert(RC && "This value type is not natively supported!"); return RC; } @@ -292,11 +283,9 @@ class TargetLowering { VT = getTypeToTransformTo(Context, VT); break; default: - assert(false && "Type is not legal nor is it to be expanded!"); - return VT; + llvm_unreachable("Type is not legal nor is it to be expanded!"); } } - return VT; } /// getVectorTypeBreakdown - Vector types are broken down into some number of @@ -520,8 +509,19 @@ class TargetLowering { /// AllowUnknown is true, this will return MVT::Other for types with no EVT /// counterpart (e.g. structs), otherwise it will assert. EVT getValueType(Type *Ty, bool AllowUnknown = false) const { - EVT VT = EVT::getEVT(Ty, AllowUnknown); - return VT == MVT::iPTR ? PointerTy : VT; + // Lower scalar pointers to native pointer types. + if (Ty->isPointerTy()) return PointerTy; + + if (Ty->isVectorTy()) { + VectorType *VTy = cast(Ty); + Type *Elm = VTy->getElementType(); + // Lower vectors of pointers to native pointer types. + if (Elm->isPointerTy()) + Elm = EVT(PointerTy).getTypeForEVT(Ty->getContext()); + return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false), + VTy->getNumElements()); + } + return EVT::getEVT(Ty, AllowUnknown); } /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate @@ -554,8 +554,7 @@ class TargetLowering { if (VT.isInteger()) { return getRegisterType(Context, getTypeToTransformTo(Context, VT)); } - assert(0 && "Unsupported extended type!"); - return EVT(MVT::Other); // Not reached + llvm_unreachable("Unsupported extended type!"); } /// getNumRegisters - Return the number of registers that this ValueType will @@ -580,8 +579,7 @@ class TargetLowering { unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits(); return (BitWidth + RegWidth - 1) / RegWidth; } - assert(0 && "Unsupported extended type!"); - return 0; // Not reached + llvm_unreachable("Unsupported extended type!"); } /// ShouldShrinkFPConstant - If true, then instruction selection should @@ -646,7 +644,7 @@ class TargetLowering { /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If - /// 'NonScalarIntSafe' is true, that means it's safe to return a + /// 'IsZeroVal' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is /// constant so it does not need to be loaded. @@ -654,7 +652,7 @@ class TargetLowering { /// target-independent logic. virtual EVT getOptimalMemOpType(uint64_t /*Size*/, unsigned /*DstAlign*/, unsigned /*SrcAlign*/, - bool /*NonScalarIntSafe*/, + bool /*IsZeroVal*/, bool /*MemcpyStrSrc*/, MachineFunction &/*MF*/) const { return MVT::Other; @@ -679,10 +677,10 @@ class TargetLowering { return StackPointerRegisterToSaveRestore; } - /// getExceptionAddressRegister - If a physical register, this returns + /// getExceptionPointerRegister - If a physical register, this returns /// the register that receives the exception address on entry to a landing /// pad. - unsigned getExceptionAddressRegister() const { + unsigned getExceptionPointerRegister() const { return ExceptionPointerRegister; } @@ -772,8 +770,7 @@ class TargetLowering { LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/, const MachineBasicBlock * /*MBB*/, unsigned /*uid*/, MCContext &/*Ctx*/) const { - assert(0 && "Need to implement this hook if target has custom JTIs"); - return 0; + llvm_unreachable("Need to implement this hook if target has custom JTIs"); } /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC @@ -865,7 +862,6 @@ class TargetLowering { /// Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. virtual void computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, @@ -1035,7 +1031,7 @@ class TargetLowering { /// addRegisterClass - Add the specified register class as an available /// regclass for the specified value type. This indicates the selector can /// handle values of that class natively. - void addRegisterClass(EVT VT, TargetRegisterClass *RC) { + void addRegisterClass(EVT VT, const TargetRegisterClass *RC) { assert((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)); AvailableRegClasses.push_back(std::make_pair(VT, RC)); RegClassForVT[VT.getSimpleVT().SimpleTy] = RC; @@ -1141,26 +1137,28 @@ class TargetLowering { JumpBufAlignment = Align; } - /// setMinFunctionAlignment - Set the target's minimum function alignment. + /// setMinFunctionAlignment - Set the target's minimum function alignment (in + /// log2(bytes)) void setMinFunctionAlignment(unsigned Align) { MinFunctionAlignment = Align; } /// setPrefFunctionAlignment - Set the target's preferred function alignment. /// This should be set if there is a performance benefit to - /// higher-than-minimum alignment + /// higher-than-minimum alignment (in log2(bytes)) void setPrefFunctionAlignment(unsigned Align) { PrefFunctionAlignment = Align; } /// setPrefLoopAlignment - Set the target's preferred loop alignment. Default /// alignment is zero, it means the target does not care about loop alignment. + /// The alignment is specified in log2(bytes). void setPrefLoopAlignment(unsigned Align) { PrefLoopAlignment = Align; } /// setMinStackArgumentAlignment - Set the minimum stack alignment of an - /// argument. + /// argument (in log2(bytes)). void setMinStackArgumentAlignment(unsigned Align) { MinStackArgumentAlignment = Align; } @@ -1196,8 +1194,7 @@ class TargetLowering { const SmallVectorImpl &/*Ins*/, DebugLoc /*dl*/, SelectionDAG &/*DAG*/, SmallVectorImpl &/*InVals*/) const { - assert(0 && "Not Implemented"); - return SDValue(); // this is here to silence compiler errors + llvm_unreachable("Not Implemented"); } /// LowerCallTo - This function lowers an abstract call to a function into an @@ -1224,7 +1221,8 @@ class TargetLowering { LowerCallTo(SDValue Chain, Type *RetTy, bool RetSExt, bool RetZExt, bool isVarArg, bool isInreg, unsigned NumFixedArgs, CallingConv::ID CallConv, bool isTailCall, - bool isReturnValueUsed, SDValue Callee, ArgListTy &Args, + bool doesNotRet, bool isReturnValueUsed, + SDValue Callee, ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) const; /// LowerCall - This hook must be implemented to lower calls into the @@ -1236,14 +1234,13 @@ class TargetLowering { virtual SDValue LowerCall(SDValue /*Chain*/, SDValue /*Callee*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/, - bool &/*isTailCall*/, + bool /*doesNotRet*/, bool &/*isTailCall*/, const SmallVectorImpl &/*Outs*/, const SmallVectorImpl &/*OutVals*/, const SmallVectorImpl &/*Ins*/, DebugLoc /*dl*/, SelectionDAG &/*DAG*/, SmallVectorImpl &/*InVals*/) const { - assert(0 && "Not Implemented"); - return SDValue(); // this is here to silence compiler errors + llvm_unreachable("Not Implemented"); } /// HandleByVal - Target-specific cleanup for formal ByVal parameters. @@ -1273,14 +1270,15 @@ class TargetLowering { const SmallVectorImpl &/*Outs*/, const SmallVectorImpl &/*OutVals*/, DebugLoc /*dl*/, SelectionDAG &/*DAG*/) const { - assert(0 && "Not Implemented"); - return SDValue(); // this is here to silence compiler errors + llvm_unreachable("Not Implemented"); } /// isUsedByReturnOnly - Return true if result of the specified node is used - /// by a return node only. This is used to determine whether it is possible + /// by a return node only. It also compute and return the input chain for the + /// tail call. + /// This is used to determine whether it is possible /// to codegen a libcall as tail call at legalization time. - virtual bool isUsedByReturnOnly(SDNode *) const { + virtual bool isUsedByReturnOnly(SDNode *, SDValue &Chain) const { return false; } @@ -1339,7 +1337,7 @@ class TargetLowering { virtual void ReplaceNodeResults(SDNode * /*N*/, SmallVectorImpl &/*Results*/, SelectionDAG &/*DAG*/) const { - assert(0 && "ReplaceNodeResults not implemented for this target!"); + llvm_unreachable("ReplaceNodeResults not implemented for this target!"); } /// getTargetNodeName() - This method returns the name of a target specific @@ -1531,6 +1529,17 @@ class TargetLowering { AddrMode() : BaseGV(0), BaseOffs(0), HasBaseReg(false), Scale(0) {} }; + /// GetAddrModeArguments - CodeGenPrepare sinks address calculations into the + /// same BB as Load/Store instructions reading the address. This allows as + /// much computation as possible to be done in the address mode for that + /// operand. This hook lets targets also pass back when this should be done + /// on intrinsics which load/store. + virtual bool GetAddrModeArguments(IntrinsicInst *I, + SmallVectorImpl &Ops, + Type *&AccessTy) const { + return false; + } + /// isLegalAddressingMode - Return true if the addressing mode represented by /// AM is legal for this target, for a load/store of the specified type. /// The type may be VoidTy, in which case only return true if the addressing @@ -1581,6 +1590,18 @@ class TargetLowering { return false; } + /// isFNegFree - Return true if an fneg operation is free to the point where + /// it is never worthwhile to replace it with a bitwise operation. + virtual bool isFNegFree(EVT) const { + return false; + } + + /// isFAbsFree - Return true if an fneg operation is free to the point where + /// it is never worthwhile to replace it with a bitwise operation. + virtual bool isFAbsFree(EVT) const { + return false; + } + /// isNarrowingProfitable - Return true if it's profitable to narrow /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow /// from i32 to i8 but not from i32 to i16. @@ -1593,9 +1614,9 @@ class TargetLowering { // SDValue BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, SelectionDAG &DAG) const; - SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, + SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, std::vector* Created) const; - SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, + SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, std::vector* Created) const; @@ -1753,7 +1774,7 @@ class TargetLowering { /// RegClassForVT - This indicates the default register class to use for /// each ValueType the target supports natively. - TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE]; + const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE]; unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE]; EVT RegisterTypeForVT[MVT::LAST_VALUETYPE]; @@ -1925,12 +1946,9 @@ class TargetLowering { // Vectors with illegal element types are expanded. EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorNumElements() / 2); return LegalizeKind(TypeSplitVector, NVT); - - assert(false && "Unable to handle this kind of vector type"); - return LegalizeKind(TypeLegal, VT); } - std::vector > AvailableRegClasses; + std::vector > AvailableRegClasses; /// TargetDAGCombineArray - Targets can specify ISD nodes that they would /// like PerformDAGCombine callbacks for by calling setTargetDAGCombine(), diff --git a/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h index 7d06cec0a4e1..d631f58aab74 100644 --- a/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h +++ b/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h @@ -15,18 +15,17 @@ #ifndef LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H #define LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H -#include "llvm/ADT/StringRef.h" +#include "llvm/Module.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/SectionKind.h" +#include "llvm/ADT/ArrayRef.h" namespace llvm { class MachineModuleInfo; class Mangler; - class MCAsmInfo; class MCContext; class MCExpr; class MCSection; - class MCSectionMachO; class MCSymbol; class MCStreamer; class GlobalValue; @@ -53,7 +52,13 @@ class TargetLoweringObjectFile : public MCObjectFileInfo { virtual void emitPersonalityValue(MCStreamer &Streamer, const TargetMachine &TM, const MCSymbol *Sym) const; - + + /// emitModuleFlags - Emit the module flags that the platform cares about. + virtual void emitModuleFlags(MCStreamer &, + ArrayRef, + Mangler *, const TargetMachine &) const { + } + /// shouldEmitUsedDirectiveFor - This hook allows targets to selectively /// decide not to emit the UsedDirective for some symbols in llvm.used. /// FIXME: REMOVE this (rdar://7071300) @@ -86,9 +91,7 @@ class TargetLoweringObjectFile : public MCObjectFileInfo { const TargetMachine &TM) const { return SectionForGlobal(GV, getKindForGlobal(GV, TM), Mang, TM); } - - - + /// getExplicitSectionGlobal - Targets should implement this method to assign /// a section to globals with an explicit section specfied. The /// implementation of this method can assume that GV->hasSection() is true. @@ -121,7 +124,18 @@ class TargetLoweringObjectFile : public MCObjectFileInfo { const MCExpr * getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const; - + + virtual const MCSection * + getStaticCtorSection(unsigned Priority = 65535) const { + (void)Priority; + return StaticCtorSection; + } + virtual const MCSection * + getStaticDtorSection(unsigned Priority = 65535) const { + (void)Priority; + return StaticDtorSection; + } + protected: virtual const MCSection * SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, diff --git a/contrib/llvm/include/llvm/Target/TargetMachine.h b/contrib/llvm/include/llvm/Target/TargetMachine.h index 8a8d14229055..1a0560478a41 100644 --- a/contrib/llvm/include/llvm/Target/TargetMachine.h +++ b/contrib/llvm/include/llvm/Target/TargetMachine.h @@ -14,7 +14,8 @@ #ifndef LLVM_TARGET_TARGETMACHINE_H #define LLVM_TARGET_TARGETMACHINE_H -#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/ADT/StringRef.h" #include #include @@ -23,11 +24,10 @@ namespace llvm { class InstrItineraryData; class JITCodeEmitter; +class GlobalValue; class MCAsmInfo; class MCCodeGenInfo; class MCContext; -class Pass; -class PassManager; class PassManagerBase; class Target; class TargetData; @@ -37,32 +37,13 @@ class TargetInstrInfo; class TargetIntrinsicInfo; class TargetJITInfo; class TargetLowering; +class TargetPassConfig; class TargetRegisterInfo; class TargetSelectionDAGInfo; class TargetSubtargetInfo; class formatted_raw_ostream; class raw_ostream; -// Code generation optimization level. -namespace CodeGenOpt { - enum Level { - None, // -O0 - Less, // -O1 - Default, // -O2, -Os - Aggressive // -O3 - }; -} - -namespace Sched { - enum Preference { - None, // No preference - Latency, // Scheduling for shortest total latency. - RegPressure, // Scheduling for lowest register pressure. - Hybrid, // Scheduling for both latency and register pressure. - ILP // Scheduling for ILP in low register pressure mode. - }; -} - //===----------------------------------------------------------------------===// /// /// TargetMachine - Primary interface to the complete machine description for @@ -74,7 +55,7 @@ class TargetMachine { void operator=(const TargetMachine &); // DO NOT IMPLEMENT protected: // Can only create subclasses. TargetMachine(const Target &T, StringRef TargetTriple, - StringRef CPU, StringRef FS); + StringRef CPU, StringRef FS, const TargetOptions &Options); /// getSubtargetImpl - virtual method implemented by subclasses that returns /// a reference to that target's TargetSubtargetInfo-derived member variable. @@ -101,6 +82,7 @@ class TargetMachine { unsigned MCSaveTempLabels : 1; unsigned MCUseLoc : 1; unsigned MCUseCFI : 1; + unsigned MCUseDwarfDirectory : 1; public: virtual ~TargetMachine(); @@ -111,6 +93,8 @@ class TargetMachine { const StringRef getTargetCPU() const { return TargetCPU; } const StringRef getTargetFeatureString() const { return TargetFS; } + TargetOptions Options; + // Interfaces to the major aspects of target machine information: // -- Instruction opcode and operand information // -- Pipelines and scheduling information @@ -196,6 +180,14 @@ class TargetMachine { /// setMCUseCFI - Set whether all we should use dwarf's .cfi_* directives. void setMCUseCFI(bool Value) { MCUseCFI = Value; } + /// hasMCUseDwarfDirectory - Check whether we should use .file directives with + /// explicit directories. + bool hasMCUseDwarfDirectory() const { return MCUseDwarfDirectory; } + + /// setMCUseDwarfDirectory - Set whether all we should use .file directives + /// with explicit directories. + void setMCUseDwarfDirectory(bool Value) { MCUseDwarfDirectory = Value; } + /// getRelocationModel - Returns the code generation relocation model. The /// choices are static, PIC, and dynamic-no-pic, and target default. Reloc::Model getRelocationModel() const; @@ -204,6 +196,18 @@ class TargetMachine { /// medium, large, and target default. CodeModel::Model getCodeModel() const; + /// getTLSModel - Returns the TLS model which should be used for the given + /// global variable. + TLSModel::Model getTLSModel(const GlobalValue *GV) const; + + /// getOptLevel - Returns the optimization level: None, Less, + /// Default, or Aggressive. + CodeGenOpt::Level getOptLevel() const; + + void setFastISel(bool Enable) { Options.EnableFastISel = Enable; } + + bool shouldPrintMachineCode() const { return Options.PrintMachineCode; } + /// getAsmVerbosityDefault - Returns the default value of asm verbosity. /// static bool getAsmVerbosityDefault(); @@ -236,10 +240,6 @@ class TargetMachine { CGFT_Null // Do not emit any output. }; - /// getEnableTailMergeDefault - the default setting for -enable-tail-merge - /// on this target. User flag overrides. - virtual bool getEnableTailMergeDefault() const { return true; } - /// addPassesToEmitFile - Add passes to the specified pass manager to get the /// specified file emitted. Typically this will involve several steps of code /// generation. This method should return true if emission of this file type @@ -247,8 +247,7 @@ class TargetMachine { virtual bool addPassesToEmitFile(PassManagerBase &, formatted_raw_ostream &, CodeGenFileType, - CodeGenOpt::Level, - bool = true) { + bool /*DisableVerify*/ = true) { return true; } @@ -260,8 +259,7 @@ class TargetMachine { /// virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &, - CodeGenOpt::Level, - bool = true) { + bool /*DisableVerify*/ = true) { return true; } @@ -273,8 +271,7 @@ class TargetMachine { virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &, - CodeGenOpt::Level, - bool = true) { + bool /*DisableVerify*/ = true) { return true; } }; @@ -285,25 +282,21 @@ class TargetMachine { class LLVMTargetMachine : public TargetMachine { protected: // Can only create subclasses. LLVMTargetMachine(const Target &T, StringRef TargetTriple, - StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); - -private: - /// addCommonCodeGenPasses - Add standard LLVM codegen passes used for - /// both emitting to assembly files or machine code output. - /// - bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, - bool DisableVerify, MCContext *&OutCtx); + StringRef CPU, StringRef FS, TargetOptions Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); public: + /// createPassConfig - Create a pass configuration object to be used by + /// addPassToEmitX methods for generating a pipeline of CodeGen passes. + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + /// addPassesToEmitFile - Add passes to the specified pass manager to get the /// specified file emitted. Typically this will involve several steps of code - /// generation. If OptLevel is None, the code generator should emit code as - /// fast as possible, though the generated code may be less efficient. + /// generation. virtual bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - CodeGenOpt::Level, bool DisableVerify = true); /// addPassesToEmitMachineCode - Add passes to the specified pass manager to @@ -314,7 +307,6 @@ class LLVMTargetMachine : public TargetMachine { /// virtual bool addPassesToEmitMachineCode(PassManagerBase &PM, JITCodeEmitter &MCE, - CodeGenOpt::Level, bool DisableVerify = true); /// addPassesToEmitMC - Add passes to the specified pass manager to get @@ -325,65 +317,15 @@ class LLVMTargetMachine : public TargetMachine { virtual bool addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, raw_ostream &OS, - CodeGenOpt::Level OptLevel, bool DisableVerify = true); - /// Target-Independent Code Generator Pass Configuration Options. - - /// addPreISelPasses - This method should add any "last minute" LLVM->LLVM - /// passes (which are run just before instruction selector). - virtual bool addPreISel(PassManagerBase &, CodeGenOpt::Level) { - return true; - } - - /// addInstSelector - This method should install an instruction selector pass, - /// which converts from LLVM code to machine instructions. - virtual bool addInstSelector(PassManagerBase &, CodeGenOpt::Level) { - return true; - } - - /// addPreRegAlloc - This method may be implemented by targets that want to - /// run passes immediately before register allocation. This should return - /// true if -print-machineinstrs should print after these passes. - virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level) { - return false; - } - - /// addPostRegAlloc - This method may be implemented by targets that want - /// to run passes after register allocation but before prolog-epilog - /// insertion. This should return true if -print-machineinstrs should print - /// after these passes. - virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level) { - return false; - } - - /// addPreSched2 - This method may be implemented by targets that want to - /// run passes after prolog-epilog insertion and before the second instruction - /// scheduling pass. This should return true if -print-machineinstrs should - /// print after these passes. - virtual bool addPreSched2(PassManagerBase &, CodeGenOpt::Level) { - return false; - } - - /// addPreEmitPass - This pass may be implemented by targets that want to run - /// passes immediately before machine code is emitted. This should return - /// true if -print-machineinstrs should print out the code after the passes. - virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level) { - return false; - } - - /// addCodeEmitter - This pass should be overridden by the target to add a /// code emitter, if supported. If this is not supported, 'true' should be /// returned. - virtual bool addCodeEmitter(PassManagerBase &, CodeGenOpt::Level, + virtual bool addCodeEmitter(PassManagerBase &, JITCodeEmitter &) { return true; } - - /// getEnableTailMergeDefault - the default setting for -enable-tail-merge - /// on this target. User flag overrides. - virtual bool getEnableTailMergeDefault() const { return true; } }; } // End llvm namespace diff --git a/contrib/llvm/include/llvm/Target/TargetOpcodes.h b/contrib/llvm/include/llvm/Target/TargetOpcodes.h index 37f7b2fb8db5..f0b181e345b7 100644 --- a/contrib/llvm/include/llvm/Target/TargetOpcodes.h +++ b/contrib/llvm/include/llvm/Target/TargetOpcodes.h @@ -82,7 +82,12 @@ namespace TargetOpcode { /// COPY - Target-independent register copy. This instruction can also be /// used to copy between subregisters of virtual registers. - COPY = 13 + COPY = 13, + + /// BUNDLE - This instruction represents an instruction bundle. Instructions + /// which immediately follow a BUNDLE instruction which are marked with + /// 'InsideBundle' flag are inside the bundle. + BUNDLE }; } // end namespace TargetOpcode } // end namespace llvm diff --git a/contrib/llvm/include/llvm/Target/TargetOptions.h b/contrib/llvm/include/llvm/Target/TargetOptions.h index e07e8c1cea08..12a275731536 100644 --- a/contrib/llvm/include/llvm/Target/TargetOptions.h +++ b/contrib/llvm/include/llvm/Target/TargetOptions.h @@ -15,151 +15,177 @@ #ifndef LLVM_TARGET_TARGETOPTIONS_H #define LLVM_TARGET_TARGETOPTIONS_H +#include + namespace llvm { class MachineFunction; + class StringRef; // Possible float ABI settings. Used with FloatABIType in TargetOptions.h. namespace FloatABI { enum ABIType { - Default, // Target-specific (either soft of hard depending on triple, etc). + Default, // Target-specific (either soft or hard depending on triple, etc). Soft, // Soft float. Hard // Hard float. }; } - - /// PrintMachineCode - This flag is enabled when the -print-machineinstrs - /// option is specified on the command line, and should enable debugging - /// output from the code generator. - extern bool PrintMachineCode; - /// NoFramePointerElim - This flag is enabled when the -disable-fp-elim is - /// specified on the command line. If the target supports the frame pointer - /// elimination optimization, this option should disable it. - extern bool NoFramePointerElim; + class TargetOptions { + public: + TargetOptions() + : PrintMachineCode(false), NoFramePointerElim(false), + NoFramePointerElimNonLeaf(false), LessPreciseFPMADOption(false), + NoExcessFPPrecision(false), UnsafeFPMath(false), NoInfsFPMath(false), + NoNaNsFPMath(false), HonorSignDependentRoundingFPMathOption(false), + UseSoftFloat(false), NoZerosInBSS(false), JITExceptionHandling(false), + JITEmitDebugInfo(false), JITEmitDebugInfoToDisk(false), + GuaranteedTailCallOpt(false), DisableTailCalls(false), + StackAlignmentOverride(0), RealignStack(true), + DisableJumpTables(false), EnableFastISel(false), + PositionIndependentExecutable(false), EnableSegmentedStacks(false), + TrapFuncName(""), FloatABIType(FloatABI::Default) + {} - /// NoFramePointerElimNonLeaf - This flag is enabled when the - /// -disable-non-leaf-fp-elim is specified on the command line. If the target - /// supports the frame pointer elimination optimization, this option should - /// disable it for non-leaf functions. - extern bool NoFramePointerElimNonLeaf; + /// PrintMachineCode - This flag is enabled when the -print-machineinstrs + /// option is specified on the command line, and should enable debugging + /// output from the code generator. + unsigned PrintMachineCode : 1; - /// DisableFramePointerElim - This returns true if frame pointer elimination - /// optimization should be disabled for the given machine function. - extern bool DisableFramePointerElim(const MachineFunction &MF); + /// NoFramePointerElim - This flag is enabled when the -disable-fp-elim is + /// specified on the command line. If the target supports the frame pointer + /// elimination optimization, this option should disable it. + unsigned NoFramePointerElim : 1; - /// LessPreciseFPMAD - This flag is enabled when the - /// -enable-fp-mad is specified on the command line. When this flag is off - /// (the default), the code generator is not allowed to generate mad - /// (multiply add) if the result is "less precise" than doing those operations - /// individually. - extern bool LessPreciseFPMADOption; - extern bool LessPreciseFPMAD(); + /// NoFramePointerElimNonLeaf - This flag is enabled when the + /// -disable-non-leaf-fp-elim is specified on the command line. If the + /// target supports the frame pointer elimination optimization, this option + /// should disable it for non-leaf functions. + unsigned NoFramePointerElimNonLeaf : 1; - /// NoExcessFPPrecision - This flag is enabled when the - /// -disable-excess-fp-precision flag is specified on the command line. When - /// this flag is off (the default), the code generator is allowed to produce - /// results that are "more precise" than IEEE allows. This includes use of - /// FMA-like operations and use of the X86 FP registers without rounding all - /// over the place. - extern bool NoExcessFPPrecision; + /// DisableFramePointerElim - This returns true if frame pointer elimination + /// optimization should be disabled for the given machine function. + bool DisableFramePointerElim(const MachineFunction &MF) const; - /// UnsafeFPMath - This flag is enabled when the - /// -enable-unsafe-fp-math flag is specified on the command line. When - /// this flag is off (the default), the code generator is not allowed to - /// produce results that are "less precise" than IEEE allows. This includes - /// use of X86 instructions like FSIN and FCOS instead of libcalls. - /// UnsafeFPMath implies LessPreciseFPMAD. - extern bool UnsafeFPMath; + /// LessPreciseFPMAD - This flag is enabled when the + /// -enable-fp-mad is specified on the command line. When this flag is off + /// (the default), the code generator is not allowed to generate mad + /// (multiply add) if the result is "less precise" than doing those + /// operations individually. + unsigned LessPreciseFPMADOption : 1; + bool LessPreciseFPMAD() const; - /// NoInfsFPMath - This flag is enabled when the - /// -enable-no-infs-fp-math flag is specified on the command line. When - /// this flag is off (the default), the code generator is not allowed to - /// assume the FP arithmetic arguments and results are never +-Infs. - extern bool NoInfsFPMath; + /// NoExcessFPPrecision - This flag is enabled when the + /// -disable-excess-fp-precision flag is specified on the command line. + /// When this flag is off (the default), the code generator is allowed to + /// produce results that are "more precise" than IEEE allows. This includes + /// use of FMA-like operations and use of the X86 FP registers without + /// rounding all over the place. + unsigned NoExcessFPPrecision : 1; - /// NoNaNsFPMath - This flag is enabled when the - /// -enable-no-nans-fp-math flag is specified on the command line. When - /// this flag is off (the default), the code generator is not allowed to - /// assume the FP arithmetic arguments and results are never NaNs. - extern bool NoNaNsFPMath; + /// UnsafeFPMath - This flag is enabled when the + /// -enable-unsafe-fp-math flag is specified on the command line. When + /// this flag is off (the default), the code generator is not allowed to + /// produce results that are "less precise" than IEEE allows. This includes + /// use of X86 instructions like FSIN and FCOS instead of libcalls. + /// UnsafeFPMath implies LessPreciseFPMAD. + unsigned UnsafeFPMath : 1; - /// HonorSignDependentRoundingFPMath - This returns true when the - /// -enable-sign-dependent-rounding-fp-math is specified. If this returns - /// false (the default), the code generator is allowed to assume that the - /// rounding behavior is the default (round-to-zero for all floating point to - /// integer conversions, and round-to-nearest for all other arithmetic - /// truncations). If this is enabled (set to true), the code generator must - /// assume that the rounding mode may dynamically change. - extern bool HonorSignDependentRoundingFPMathOption; - extern bool HonorSignDependentRoundingFPMath(); - - /// UseSoftFloat - This flag is enabled when the -soft-float flag is specified - /// on the command line. When this flag is on, the code generator will - /// generate libcalls to the software floating point library instead of - /// target FP instructions. - extern bool UseSoftFloat; + /// NoInfsFPMath - This flag is enabled when the + /// -enable-no-infs-fp-math flag is specified on the command line. When + /// this flag is off (the default), the code generator is not allowed to + /// assume the FP arithmetic arguments and results are never +-Infs. + unsigned NoInfsFPMath : 1; - /// FloatABIType - This setting is set by -float-abi=xxx option is specfied - /// on the command line. This setting may either be Default, Soft, or Hard. - /// Default selects the target's default behavior. Soft selects the ABI for - /// UseSoftFloat, but does not inidcate that FP hardware may not be used. - /// Such a combination is unfortunately popular (e.g. arm-apple-darwin). - /// Hard presumes that the normal FP ABI is used. - extern FloatABI::ABIType FloatABIType; + /// NoNaNsFPMath - This flag is enabled when the + /// -enable-no-nans-fp-math flag is specified on the command line. When + /// this flag is off (the default), the code generator is not allowed to + /// assume the FP arithmetic arguments and results are never NaNs. + unsigned NoNaNsFPMath : 1; - /// NoZerosInBSS - By default some codegens place zero-initialized data to - /// .bss section. This flag disables such behaviour (necessary, e.g. for - /// crt*.o compiling). - extern bool NoZerosInBSS; + /// HonorSignDependentRoundingFPMath - This returns true when the + /// -enable-sign-dependent-rounding-fp-math is specified. If this returns + /// false (the default), the code generator is allowed to assume that the + /// rounding behavior is the default (round-to-zero for all floating point + /// to integer conversions, and round-to-nearest for all other arithmetic + /// truncations). If this is enabled (set to true), the code generator must + /// assume that the rounding mode may dynamically change. + unsigned HonorSignDependentRoundingFPMathOption : 1; + bool HonorSignDependentRoundingFPMath() const; - /// JITExceptionHandling - This flag indicates that the JIT should emit - /// exception handling information. - extern bool JITExceptionHandling; + /// UseSoftFloat - This flag is enabled when the -soft-float flag is + /// specified on the command line. When this flag is on, the code generator + /// will generate libcalls to the software floating point library instead of + /// target FP instructions. + unsigned UseSoftFloat : 1; - /// JITEmitDebugInfo - This flag indicates that the JIT should try to emit - /// debug information and notify a debugger about it. - extern bool JITEmitDebugInfo; + /// NoZerosInBSS - By default some codegens place zero-initialized data to + /// .bss section. This flag disables such behaviour (necessary, e.g. for + /// crt*.o compiling). + unsigned NoZerosInBSS : 1; - /// JITEmitDebugInfoToDisk - This flag indicates that the JIT should write - /// the object files generated by the JITEmitDebugInfo flag to disk. This - /// flag is hidden and is only for debugging the debug info. - extern bool JITEmitDebugInfoToDisk; + /// JITExceptionHandling - This flag indicates that the JIT should emit + /// exception handling information. + unsigned JITExceptionHandling : 1; - /// GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is - /// specified on the commandline. When the flag is on, participating targets - /// will perform tail call optimization on all calls which use the fastcc - /// calling convention and which satisfy certain target-independent - /// criteria (being at the end of a function, having the same return type - /// as their parent function, etc.), using an alternate ABI if necessary. - extern bool GuaranteedTailCallOpt; + /// JITEmitDebugInfo - This flag indicates that the JIT should try to emit + /// debug information and notify a debugger about it. + unsigned JITEmitDebugInfo : 1; - /// StackAlignmentOverride - Override default stack alignment for target. - extern unsigned StackAlignmentOverride; + /// JITEmitDebugInfoToDisk - This flag indicates that the JIT should write + /// the object files generated by the JITEmitDebugInfo flag to disk. This + /// flag is hidden and is only for debugging the debug info. + unsigned JITEmitDebugInfoToDisk : 1; - /// RealignStack - This flag indicates whether the stack should be - /// automatically realigned, if needed. - extern bool RealignStack; + /// GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is + /// specified on the commandline. When the flag is on, participating targets + /// will perform tail call optimization on all calls which use the fastcc + /// calling convention and which satisfy certain target-independent + /// criteria (being at the end of a function, having the same return type + /// as their parent function, etc.), using an alternate ABI if necessary. + unsigned GuaranteedTailCallOpt : 1; - /// DisableJumpTables - This flag indicates jump tables should not be - /// generated. - extern bool DisableJumpTables; + /// DisableTailCalls - This flag controls whether we will use tail calls. + /// Disabling them may be useful to maintain a correct call stack. + unsigned DisableTailCalls : 1; - /// EnableFastISel - This flag enables fast-path instruction selection - /// which trades away generated code quality in favor of reducing - /// compile time. - extern bool EnableFastISel; - - /// StrongPHIElim - This flag enables more aggressive PHI elimination - /// wth earlier copy coalescing. - extern bool StrongPHIElim; + /// StackAlignmentOverride - Override default stack alignment for target. + unsigned StackAlignmentOverride; - /// getTrapFunctionName - If this returns a non-empty string, this means isel - /// should lower Intrinsic::trap to a call to the specified function name - /// instead of an ISD::TRAP node. - extern StringRef getTrapFunctionName(); + /// RealignStack - This flag indicates whether the stack should be + /// automatically realigned, if needed. + unsigned RealignStack : 1; - extern bool EnableSegmentedStacks; + /// DisableJumpTables - This flag indicates jump tables should not be + /// generated. + unsigned DisableJumpTables : 1; + /// EnableFastISel - This flag enables fast-path instruction selection + /// which trades away generated code quality in favor of reducing + /// compile time. + unsigned EnableFastISel : 1; + + /// PositionIndependentExecutable - This flag indicates whether the code + /// will eventually be linked into a single executable, despite the PIC + /// relocation model being in use. It's value is undefined (and irrelevant) + /// if the relocation model is anything other than PIC. + unsigned PositionIndependentExecutable : 1; + + unsigned EnableSegmentedStacks : 1; + + /// getTrapFunctionName - If this returns a non-empty string, this means + /// isel should lower Intrinsic::trap to a call to the specified function + /// name instead of an ISD::TRAP node. + std::string TrapFuncName; + StringRef getTrapFunctionName() const; + + /// FloatABIType - This setting is set by -float-abi=xxx option is specfied + /// on the command line. This setting may either be Default, Soft, or Hard. + /// Default selects the target's default behavior. Soft selects the ABI for + /// UseSoftFloat, but does not indicate that FP hardware may not be used. + /// Such a combination is unfortunately popular (e.g. arm-apple-darwin). + /// Hard presumes that the normal FP ABI is used. + FloatABI::ABIType FloatABIType; + }; } // End llvm namespace #endif diff --git a/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h b/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h index 682aa50736db..7d8a46b49ac9 100644 --- a/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h +++ b/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/CallingConv.h" #include #include @@ -33,25 +34,18 @@ class raw_ostream; class TargetRegisterClass { public: - typedef const unsigned* iterator; - typedef const unsigned* const_iterator; - typedef const EVT* vt_iterator; + typedef const uint16_t* iterator; + typedef const uint16_t* const_iterator; + typedef const MVT::SimpleValueType* vt_iterator; typedef const TargetRegisterClass* const * sc_iterator; -private: + + // Instance variables filled by tablegen, do not use! const MCRegisterClass *MC; const vt_iterator VTs; const unsigned *SubClassMask; const sc_iterator SuperClasses; const sc_iterator SuperRegClasses; -public: - TargetRegisterClass(const MCRegisterClass *MC, const EVT *vts, - const unsigned *subcm, - const TargetRegisterClass * const *supcs, - const TargetRegisterClass * const *superregcs) - : MC(MC), VTs(vts), SubClassMask(subcm), SuperClasses(supcs), - SuperRegClasses(superregcs) {} - - virtual ~TargetRegisterClass() {} // Allow subclasses + ArrayRef (*OrderFunc)(const MachineFunction&); /// getID() - Return the register class ID number. /// @@ -108,7 +102,7 @@ class TargetRegisterClass { /// bool hasType(EVT vt) const { for(int i = 0; VTs[i] != MVT::Other; ++i) - if (VTs[i] == vt) + if (EVT(VTs[i]) == vt) return true; return false; } @@ -165,7 +159,7 @@ class TargetRegisterClass { /// getSubClassMask - Returns a bit vector of subclasses, including this one. /// The vector is indexed by class IDs, see hasSubClassEq() above for how to /// use it. - const unsigned *getSubClassMask() const { + const uint32_t *getSubClassMask() const { return SubClassMask; } @@ -196,9 +190,8 @@ class TargetRegisterClass { /// /// By default, this method returns all registers in the class. /// - virtual - ArrayRef getRawAllocationOrder(const MachineFunction &MF) const { - return makeArrayRef(begin(), getNumRegs()); + ArrayRef getRawAllocationOrder(const MachineFunction &MF) const { + return OrderFunc ? OrderFunc(MF) : makeArrayRef(begin(), getNumRegs()); } }; @@ -209,6 +202,13 @@ struct TargetRegisterInfoDesc { bool inAllocatableClass; // Register belongs to an allocatable regclass. }; +/// Each TargetRegisterClass has a per register weight, and weight +/// limit which must be less than the limits of its pressure sets. +struct RegClassWeight { + unsigned RegWeigt; + unsigned WeightLimit; +}; + /// TargetRegisterInfo base class - We assume that the target defines a static /// array of TargetRegisterDesc objects that represent all of the machine /// registers that the target has. As such, we simply have to track a pointer @@ -332,7 +332,7 @@ class TargetRegisterInfo : public MCRegisterInfo { if (regA == regB) return true; if (isVirtualRegister(regA) || isVirtualRegister(regB)) return false; - for (const unsigned *regList = getOverlaps(regA)+1; *regList; ++regList) { + for (const uint16_t *regList = getOverlaps(regA)+1; *regList; ++regList) { if (*regList == regB) return true; } return false; @@ -347,7 +347,7 @@ class TargetRegisterInfo : public MCRegisterInfo { /// isSuperRegister - Returns true if regB is a super-register of regA. /// bool isSuperRegister(unsigned regA, unsigned regB) const { - for (const unsigned *regList = getSuperRegisters(regA); *regList;++regList){ + for (const uint16_t *regList = getSuperRegisters(regA); *regList;++regList){ if (*regList == regB) return true; } return false; @@ -356,10 +356,33 @@ class TargetRegisterInfo : public MCRegisterInfo { /// getCalleeSavedRegs - Return a null-terminated list of all of the /// callee saved registers on this target. The register should be in the /// order of desired callee-save stack frame offset. The first register is - /// closed to the incoming stack pointer if stack grows down, and vice versa. - virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF = 0) + /// closest to the incoming stack pointer if stack grows down, and vice versa. + /// + virtual const uint16_t* getCalleeSavedRegs(const MachineFunction *MF = 0) const = 0; + /// getCallPreservedMask - Return a mask of call-preserved registers for the + /// given calling convention on the current sub-target. The mask should + /// include all call-preserved aliases. This is used by the register + /// allocator to determine which registers can be live across a call. + /// + /// The mask is an array containing (TRI::getNumRegs()+31)/32 entries. + /// A set bit indicates that all bits of the corresponding register are + /// preserved across the function call. The bit mask is expected to be + /// sub-register complete, i.e. if A is preserved, so are all its + /// sub-registers. + /// + /// Bits are numbered from the LSB, so the bit for physical register Reg can + /// be found as (Mask[Reg / 32] >> Reg % 32) & 1. + /// + /// A NULL pointer means that no register mask will be used, and call + /// instructions should use implicit-def operands to indicate call clobbered + /// registers. + /// + virtual const uint32_t *getCallPreservedMask(CallingConv::ID) const { + // The default mask clobbers everything. All targets should override. + return 0; + } /// getReservedRegs - Returns a bitset indexed by physical register number /// indicating if a register is a special register that has particular uses @@ -367,24 +390,11 @@ class TargetRegisterInfo : public MCRegisterInfo { /// used by register scavenger to determine what registers are free. virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0; - /// getSubReg - Returns the physical register number of sub-register "Index" - /// for physical register RegNo. Return zero if the sub-register does not - /// exist. - virtual unsigned getSubReg(unsigned RegNo, unsigned Index) const = 0; - - /// getSubRegIndex - For a given register pair, return the sub-register index - /// if the second register is a sub-register of the first. Return zero - /// otherwise. - virtual unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const = 0; - /// getMatchingSuperReg - Return a super-register of the specified register /// Reg so its sub-register of index SubIdx is Reg. unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const { - for (const unsigned *SRs = getSuperRegisters(Reg); unsigned SR = *SRs;++SRs) - if (Reg == getSubReg(SR, SubIdx) && RC->contains(SR)) - return SR; - return 0; + return MCRegisterInfo::getMatchingSuperReg(Reg, SubIdx, RC->MC); } /// canCombineSubRegIndices - Given a register class and a list of @@ -402,11 +412,11 @@ class TargetRegisterInfo : public MCRegisterInfo { /// getMatchingSuperRegClass - Return a subclass of the specified register /// class A so that each register in it has a sub-register of the /// specified sub-register index which is in the specified register class B. + /// + /// TableGen will synthesize missing A sub-classes. virtual const TargetRegisterClass * getMatchingSuperRegClass(const TargetRegisterClass *A, - const TargetRegisterClass *B, unsigned Idx) const { - return 0; - } + const TargetRegisterClass *B, unsigned Idx) const =0; /// getSubClassWithSubReg - Returns the largest legal sub-class of RC that /// supports the sub-register index Idx. @@ -419,6 +429,7 @@ class TargetRegisterInfo : public MCRegisterInfo { /// supported by the full GR32 register class in 64-bit mode, but only by the /// GR32_ABCD regiister class in 32-bit mode. /// + /// TableGen will synthesize missing RC sub-classes. virtual const TargetRegisterClass * getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx) const =0; @@ -469,8 +480,7 @@ class TargetRegisterInfo : public MCRegisterInfo { /// values. If a target supports multiple different pointer register classes, /// kind specifies which one is indicated. virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const { - assert(0 && "Target didn't implement getPointerRegClass!"); - return 0; // Must return a value in order to compile with VS 2005 + llvm_unreachable("Target didn't implement getPointerRegClass!"); } /// getCrossCopyRegClass - Returns a legal register class to copy a register @@ -497,18 +507,37 @@ class TargetRegisterInfo : public MCRegisterInfo { /// getRegPressureLimit - Return the register pressure "high water mark" for /// the specific register class. The scheduler is in high register pressure /// mode (for the specific register class) if it goes over the limit. + /// + /// Note: this is the old register pressure model that relies on a manually + /// specified representative register class per value type. virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { return 0; } + /// Get the weight in units of pressure for this register class. + virtual const RegClassWeight &getRegClassWeight( + const TargetRegisterClass *RC) const = 0; + + /// Get the number of dimensions of register pressure. + virtual unsigned getNumRegPressureSets() const = 0; + + /// Get the register unit pressure limit for this dimension. + /// This limit must be adjusted dynamically for reserved registers. + virtual unsigned getRegPressureSetLimit(unsigned Idx) const = 0; + + /// Get the dimensions of register pressure impacted by this register class. + /// Returns a -1 terminated array of pressure set IDs. + virtual const int *getRegClassPressureSets( + const TargetRegisterClass *RC) const = 0; + /// getRawAllocationOrder - Returns the register allocation order for a /// specified register class with a target-dependent hint. The returned list /// may contain reserved registers that cannot be allocated. /// /// Register allocators need only call this function to resolve /// target-dependent hints, but it should work without hinting as well. - virtual ArrayRef + virtual ArrayRef getRawAllocationOrder(const TargetRegisterClass *RC, unsigned HintType, unsigned HintReg, const MachineFunction &MF) const { @@ -607,22 +636,22 @@ class TargetRegisterInfo : public MCRegisterInfo { virtual void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const { - assert(0 && "materializeFrameBaseRegister does not exist on this target"); + llvm_unreachable("materializeFrameBaseRegister does not exist on this " + "target"); } /// resolveFrameIndex - Resolve a frame index operand of an instruction /// to reference the indicated base register plus offset instead. virtual void resolveFrameIndex(MachineBasicBlock::iterator I, unsigned BaseReg, int64_t Offset) const { - assert(0 && "resolveFrameIndex does not exist on this target"); + llvm_unreachable("resolveFrameIndex does not exist on this target"); } /// isFrameOffsetLegal - Determine whether a given offset immediate is /// encodable to resolve a frame index. virtual bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const { - assert(0 && "isFrameOffsetLegal does not exist on this target"); - return false; // Must return a value in order to compile with VS 2005 + llvm_unreachable("isFrameOffsetLegal does not exist on this target"); } /// eliminateCallFramePseudoInstr - This method is called during prolog/epilog @@ -636,7 +665,8 @@ class TargetRegisterInfo : public MCRegisterInfo { eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { - assert(0 && "Call Frame Pseudo Instructions do not exist on this target!"); + llvm_unreachable("Call Frame Pseudo Instructions do not exist on this " + "target!"); } diff --git a/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td b/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td index 612635ea746d..f55cf0e6306c 100644 --- a/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -352,6 +352,8 @@ def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>; def ctlz : SDNode<"ISD::CTLZ" , SDTIntUnaryOp>; def cttz : SDNode<"ISD::CTTZ" , SDTIntUnaryOp>; def ctpop : SDNode<"ISD::CTPOP" , SDTIntUnaryOp>; +def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntUnaryOp>; +def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntUnaryOp>; def sext : SDNode<"ISD::SIGN_EXTEND", SDTIntExtendOp>; def zext : SDNode<"ISD::ZERO_EXTEND", SDTIntExtendOp>; def anyext : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>; @@ -655,6 +657,51 @@ def zextloadi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; }]>; +def extloadvi1 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i1; +}]>; +def extloadvi8 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def extloadvi16 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i16; +}]>; +def extloadvi32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i32; +}]>; +def extloadvf32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::f32; +}]>; +def extloadvf64 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::f64; +}]>; + +def sextloadvi1 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i1; +}]>; +def sextloadvi8 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def sextloadvi16 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i16; +}]>; +def sextloadvi32 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i32; +}]>; + +def zextloadvi1 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i1; +}]>; +def zextloadvi8 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def zextloadvi16 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i16; +}]>; +def zextloadvi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i32; +}]>; + // store fragments. def unindexedstore : PatFrag<(ops node:$val, node:$ptr), (st node:$val, node:$ptr), [{ diff --git a/contrib/llvm/include/llvm/Target/TargetSubtargetInfo.h b/contrib/llvm/include/llvm/Target/TargetSubtargetInfo.h index 9556c7ab5abe..fc23b2c6b58d 100644 --- a/contrib/llvm/include/llvm/Target/TargetSubtargetInfo.h +++ b/contrib/llvm/include/llvm/Target/TargetSubtargetInfo.h @@ -15,7 +15,7 @@ #define LLVM_TARGET_TARGETSUBTARGETINFO_H #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CodeGen.h" namespace llvm { @@ -39,7 +39,7 @@ class TargetSubtargetInfo : public MCSubtargetInfo { // AntiDepBreakMode - Type of anti-dependence breaking that should // be performed before post-RA scheduling. typedef enum { ANTIDEP_NONE, ANTIDEP_CRITICAL, ANTIDEP_ALL } AntiDepBreakMode; - typedef SmallVectorImpl RegClassVector; + typedef SmallVectorImpl RegClassVector; virtual ~TargetSubtargetInfo(); diff --git a/contrib/llvm/include/llvm/Transforms/IPO.h b/contrib/llvm/include/llvm/Transforms/IPO.h index f9d7f9e6b98a..18176e8fdbb1 100644 --- a/contrib/llvm/include/llvm/Transforms/IPO.h +++ b/contrib/llvm/include/llvm/Transforms/IPO.h @@ -94,6 +94,7 @@ Pass *createFunctionInliningPass(int Threshold); /// createAlwaysInlinerPass - Return a new pass object that inlines only /// functions that are marked as "always_inline". Pass *createAlwaysInlinerPass(); +Pass *createAlwaysInlinerPass(bool InsertLifetime); //===----------------------------------------------------------------------===// /// createPruneEHPass - Return a new pass object which transforms invoke diff --git a/contrib/llvm/include/llvm/Transforms/IPO/InlinerPass.h b/contrib/llvm/include/llvm/Transforms/IPO/InlinerPass.h index 3ac4c591c94f..7c3cfc870156 100644 --- a/contrib/llvm/include/llvm/Transforms/IPO/InlinerPass.h +++ b/contrib/llvm/include/llvm/Transforms/IPO/InlinerPass.h @@ -31,7 +31,7 @@ namespace llvm { /// struct Inliner : public CallGraphSCCPass { explicit Inliner(char &ID); - explicit Inliner(char &ID, int Threshold); + explicit Inliner(char &ID, int Threshold, bool InsertLifetime); /// getAnalysisUsage - For this class, we declare that we require and preserve /// the call graph. If the derived class implements this method, it should @@ -65,28 +65,21 @@ struct Inliner : public CallGraphSCCPass { /// virtual InlineCost getInlineCost(CallSite CS) = 0; - // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a - // higher threshold to determine if the function call should be inlined. + /// removeDeadFunctions - Remove dead functions. /// - virtual float getInlineFudgeFactor(CallSite CS) = 0; + /// This also includes a hack in the form of the 'AlwaysInlineOnly' flag + /// which restricts it to deleting functions with an 'AlwaysInline' + /// attribute. This is useful for the InlineAlways pass that only wants to + /// deal with that subset of the functions. + bool removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly = false); - /// resetCachedCostInfo - erase any cached cost data from the derived class. - /// If the derived class has no such data this can be empty. - /// - virtual void resetCachedCostInfo(Function* Caller) = 0; - - /// growCachedCostInfo - update the cached cost info for Caller after Callee - /// has been inlined. - virtual void growCachedCostInfo(Function *Caller, Function *Callee) = 0; - - /// removeDeadFunctions - Remove dead functions that are not included in - /// DNR (Do Not Remove) list. - bool removeDeadFunctions(CallGraph &CG, - SmallPtrSet *DNR = NULL); private: // InlineThreshold - Cache the value here for easy access. unsigned InlineThreshold; + // InsertLifetime - Insert @llvm.lifetime intrinsics. + bool InsertLifetime; + /// shouldInline - Return true if the inliner should attempt to /// inline at the given CallSite. bool shouldInline(CallSite CS); diff --git a/contrib/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/contrib/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h index cc74e7fefe16..47ce90265bd5 100644 --- a/contrib/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/contrib/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -60,6 +60,10 @@ class PassManagerBuilder { /// out of the frontend. EP_EarlyAsPossible, + /// EP_ModuleOptimizerEarly - This extension point allows adding passes + /// just before the main module-level optimization passes. + EP_ModuleOptimizerEarly, + /// EP_LoopOptimizerEnd - This extension point allows adding loop passes to /// the end of the loop optimizer. EP_LoopOptimizerEnd, @@ -67,7 +71,16 @@ class PassManagerBuilder { /// EP_ScalarOptimizerLate - This extension point allows adding optimization /// passes after most of the main optimizations, but before the last /// cleanup-ish optimizations. - EP_ScalarOptimizerLate + EP_ScalarOptimizerLate, + + /// EP_OptimizerLast -- This extension point allows adding passes that + /// run after everything else. + EP_OptimizerLast, + + /// EP_EnabledOnOptLevel0 - This extension point allows adding passes that + /// should not be disabled by O0 optimization level. The passes will be + /// inserted after the inlining pass. + EP_EnabledOnOptLevel0 }; /// The Optimization Level - Specify the basic optimization level. @@ -90,6 +103,7 @@ class PassManagerBuilder { bool DisableSimplifyLibCalls; bool DisableUnitAtATime; bool DisableUnrollLoops; + bool Vectorize; private: /// ExtensionList - This is list of all of the extensions that are registered. @@ -117,8 +131,9 @@ class PassManagerBuilder { /// populateModulePassManager - This sets up the primary pass manager. void populateModulePassManager(PassManagerBase &MPM); void populateLTOPassManager(PassManagerBase &PM, bool Internalize, - bool RunInliner); + bool RunInliner, bool DisableGVNLoadPRE = false); }; + /// Registers a function for adding a standard set of passes. This should be /// used by optimizer plugins to allow all front ends to transparently use /// them. Create a static instance of this class in your plugin, providing a @@ -129,5 +144,6 @@ struct RegisterStandardPasses { PassManagerBuilder::addGlobalExtension(Ty, Fn); } }; + } // end namespace llvm #endif diff --git a/contrib/llvm/include/llvm/Transforms/Instrumentation.h b/contrib/llvm/include/llvm/Transforms/Instrumentation.h index 8d552317f236..bbf3a69d246d 100644 --- a/contrib/llvm/include/llvm/Transforms/Instrumentation.h +++ b/contrib/llvm/include/llvm/Transforms/Instrumentation.h @@ -17,6 +17,7 @@ namespace llvm { class ModulePass; +class FunctionPass; // Insert edge profiling instrumentation ModulePass *createEdgeProfilerPass(); @@ -29,7 +30,13 @@ ModulePass *createPathProfilerPass(); // Insert GCOV profiling instrumentation ModulePass *createGCOVProfilerPass(bool EmitNotes = true, bool EmitData = true, - bool Use402Format = false); + bool Use402Format = false, + bool UseExtraChecksum = false); + +// Insert AddressSanitizer (address sanity checking) instrumentation +ModulePass *createAddressSanitizerPass(); +// Insert ThreadSanitizer (race detection) instrumentation +FunctionPass *createThreadSanitizerPass(); } // End llvm namespace diff --git a/contrib/llvm/include/llvm/Transforms/Scalar.h b/contrib/llvm/include/llvm/Transforms/Scalar.h index b1536f906d8c..7f055d446171 100644 --- a/contrib/llvm/include/llvm/Transforms/Scalar.h +++ b/contrib/llvm/include/llvm/Transforms/Scalar.h @@ -112,6 +112,8 @@ Pass *createLICMPass(); // Pass *createLoopStrengthReducePass(const TargetLowering *TLI = 0); +Pass *createGlobalMergePass(const TargetLowering *TLI = 0); + //===----------------------------------------------------------------------===// // // LoopUnswitch - This pass is a simple loop unswitching pass. @@ -305,12 +307,6 @@ FunctionPass *createCodeGenPreparePass(const TargetLowering *TLI = 0); FunctionPass *createInstructionNamerPass(); extern char &InstructionNamerID; -//===----------------------------------------------------------------------===// -// -// GEPSplitter - Split complex GEPs into simple ones -// -FunctionPass *createGEPSplitterPass(); - //===----------------------------------------------------------------------===// // // Sink - Code Sinking @@ -329,6 +325,12 @@ Pass *createLowerAtomicPass(); // Pass *createCorrelatedValuePropagationPass(); +//===----------------------------------------------------------------------===// +// +// ObjCARCAPElim - ObjC ARC autorelease pool elimination. +// +Pass *createObjCARCAPElimPass(); + //===----------------------------------------------------------------------===// // // ObjCARCExpand - ObjC ARC preliminary simplifications. diff --git a/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h index 6fcd160e64e4..867b9e43849d 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -173,9 +173,8 @@ BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P); /// complicated to handle the case where one of the edges being split /// is an exit of a loop with other exits). /// -BasicBlock *SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds, - unsigned NumPreds, const char *Suffix, - Pass *P = 0); +BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef Preds, + const char *Suffix, Pass *P = 0); /// SplitLandingPadPredecessors - This method transforms the landing pad, /// OrigBB, by introducing two new basic blocks into the function. One of those diff --git a/contrib/llvm/include/llvm/Transforms/Utils/BasicInliner.h b/contrib/llvm/include/llvm/Transforms/Utils/BasicInliner.h deleted file mode 100644 index 4bca6b8c4417..000000000000 --- a/contrib/llvm/include/llvm/Transforms/Utils/BasicInliner.h +++ /dev/null @@ -1,55 +0,0 @@ -//===- BasicInliner.h - Basic function level inliner ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines a simple function based inliner that does not use -// call graph information. -// -//===----------------------------------------------------------------------===// - -#ifndef BASICINLINER_H -#define BASICINLINER_H - -#include "llvm/Analysis/InlineCost.h" - -namespace llvm { - - class Function; - class TargetData; - struct BasicInlinerImpl; - - /// BasicInliner - BasicInliner provides function level inlining interface. - /// Clients provide list of functions which are inline without using - /// module level call graph information. Note that the BasicInliner is - /// free to delete a function if it is inlined into all call sites. - class BasicInliner { - public: - - explicit BasicInliner(TargetData *T = NULL); - ~BasicInliner(); - - /// addFunction - Add function into the list of functions to process. - /// All functions must be inserted using this interface before invoking - /// inlineFunctions(). - void addFunction(Function *F); - - /// neverInlineFunction - Sometimes a function is never to be inlined - /// because of one or other reason. - void neverInlineFunction(Function *F); - - /// inlineFuctions - Walk all call sites in all functions supplied by - /// client. Inline as many call sites as possible. Delete completely - /// inlined functions. - void inlineFunctions(); - - private: - BasicInlinerImpl *Impl; - }; -} - -#endif diff --git a/contrib/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/contrib/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h index e82593838467..17cd58eb014e 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h @@ -20,6 +20,7 @@ namespace llvm { class Value; class TargetData; + class TargetLibraryInfo; /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. Value *CastToCStr(Value *V, IRBuilder<> &B); @@ -68,7 +69,7 @@ namespace llvm { /// 'Op' and returns one value with the same type. If 'Op' is a long double, /// 'l' is added as the suffix of name, if 'Op' is a float, we add a 'f' /// suffix. - Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B, + Value *EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, const AttrListPtr &Attrs); /// EmitPutChar - Emit a call to the putchar function. This assumes that Char @@ -86,12 +87,13 @@ namespace llvm { /// EmitFPutS - Emit a call to the puts function. Str is required to be a /// pointer and File is a pointer to FILE. - void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, const TargetData *TD); + void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI); /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B, - const TargetData *TD); + const TargetData *TD, const TargetLibraryInfo *TLI); /// SimplifyFortifiedLibCalls - Helper class for folding checked library /// calls (e.g. __strcpy_chk) into their unchecked counterparts. diff --git a/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h b/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h index 674c2d002e85..b7b5d29b320f 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -56,21 +56,13 @@ struct ClonedCodeInfo { /// call instruction. bool ContainsCalls; - /// ContainsUnwinds - This is set to true if the cloned code contains an - /// unwind instruction. - bool ContainsUnwinds; - /// ContainsDynamicAllocas - This is set to true if the cloned code contains /// a 'dynamic' alloca. Dynamic allocas are allocas that are either not in /// the entry block or they are in the entry block but are not a constant /// size. bool ContainsDynamicAllocas; - ClonedCodeInfo() { - ContainsCalls = false; - ContainsUnwinds = false; - ContainsDynamicAllocas = false; - } + ClonedCodeInfo() : ContainsCalls(false), ContainsDynamicAllocas(false) {} }; @@ -134,8 +126,8 @@ inline Function *CloneFunction(const Function *F, ClonedCodeInfo *CodeInfo = 0){ /// Clone OldFunc into NewFunc, transforming the old arguments into references /// to VMap values. Note that if NewFunc already has basic blocks, the ones /// cloned into it will be added to the end of the function. This function -/// fills in a list of return instructions, and can optionally append the -/// specified suffix to all values cloned. +/// fills in a list of return instructions, and can optionally remap types +/// and/or append the specified suffix to all values cloned. /// /// If ModuleLevelChanges is false, VMap contains no non-identity GlobalValue /// mappings. @@ -145,7 +137,8 @@ void CloneFunctionInto(Function *NewFunc, const Function *OldFunc, bool ModuleLevelChanges, SmallVectorImpl &Returns, const char *NameSuffix = "", - ClonedCodeInfo *CodeInfo = 0); + ClonedCodeInfo *CodeInfo = 0, + ValueMapTypeRemapper *TypeMapper = 0); /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The @@ -204,9 +197,9 @@ class InlineFunctionInfo { /// exists in the instruction stream. Similarly this will inline a recursive /// function by one level. /// -bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI); -bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI); -bool InlineFunction(CallSite CS, InlineFunctionInfo &IFI); +bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI, bool InsertLifetime = true); +bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, bool InsertLifetime = true); +bool InlineFunction(CallSite CS, InlineFunctionInfo &IFI, bool InsertLifetime = true); } // End llvm namespace diff --git a/contrib/llvm/include/llvm/Transforms/Utils/CmpInstAnalysis.h b/contrib/llvm/include/llvm/Transforms/Utils/CmpInstAnalysis.h new file mode 100644 index 000000000000..7ad7bddce503 --- /dev/null +++ b/contrib/llvm/include/llvm/Transforms/Utils/CmpInstAnalysis.h @@ -0,0 +1,66 @@ +//===-- CmpInstAnalysis.h - Utils to help fold compare insts ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file holds routines to help analyse compare instructions +// and fold them into constants or other compare instructions +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_CMPINSTANALYSIS_H +#define LLVM_TRANSFORMS_UTILS_CMPINSTANALYSIS_H + +#include "llvm/InstrTypes.h" + +namespace llvm { + class ICmpInst; + class Value; + + /// getICmpCode - Encode a icmp predicate into a three bit mask. These bits + /// are carefully arranged to allow folding of expressions such as: + /// + /// (A < B) | (A > B) --> (A != B) + /// + /// Note that this is only valid if the first and second predicates have the + /// same sign. Is illegal to do: (A u< B) | (A s> B) + /// + /// Three bits are used to represent the condition, as follows: + /// 0 A > B + /// 1 A == B + /// 2 A < B + /// + /// <=> Value Definition + /// 000 0 Always false + /// 001 1 A > B + /// 010 2 A == B + /// 011 3 A >= B + /// 100 4 A < B + /// 101 5 A != B + /// 110 6 A <= B + /// 111 7 Always true + /// + unsigned getICmpCode(const ICmpInst *ICI, bool InvertPred = false); + + /// getICmpValue - This is the complement of getICmpCode, which turns an + /// opcode and two operands into either a constant true or false, or the + /// predicate for a new ICmp instruction. The sign is passed in to determine + /// which kind of predicate to use in the new icmp instruction. + /// Non-NULL return value will be a true or false constant. + /// NULL return means a new ICmp is needed. The predicate for which is + /// output in NewICmpPred. + Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, + CmpInst::Predicate &NewICmpPred); + + /// PredicatesFoldable - Return true if both predicates match sign or if at + /// least one of them is an equality comparison (which is signless). + bool PredicatesFoldable(CmpInst::Predicate p1, CmpInst::Predicate p2); + +} // end namespace llvm + +#endif + diff --git a/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h new file mode 100644 index 000000000000..2c0ec9b118cf --- /dev/null +++ b/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h @@ -0,0 +1,33 @@ +//===-- ModuleUtils.h - Functions to manipulate Modules ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform manipulations on Modules. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_MODULE_UTILS_H +#define LLVM_TRANSFORMS_UTILS_MODULE_UTILS_H + +namespace llvm { + +class Module; +class Function; + +/// Append F to the list of global ctors of module M with the given Priority. +/// This wraps the function in the appropriate structure and stores it along +/// side other global constructors. For details see +/// http://llvm.org/docs/LangRef.html#intg_global_ctors +void appendToGlobalCtors(Module &M, Function *F, int Priority); + +/// Same as appendToGlobalCtors(), but for global dtors. +void appendToGlobalDtors(Module &M, Function *F, int Priority); + +} // End llvm namespace + +#endif // LLVM_TRANSFORMS_UTILS_MODULE_UTILS_H diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdater.h b/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdater.h index 064e5501a455..4c821491b210 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdater.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdater.h @@ -14,16 +14,18 @@ #ifndef LLVM_TRANSFORMS_UTILS_SSAUPDATER_H #define LLVM_TRANSFORMS_UTILS_SSAUPDATER_H +#include "llvm/ADT/StringRef.h" + namespace llvm { - class Value; class BasicBlock; - class Use; - class PHINode; + class Instruction; + class LoadInst; template class SmallVectorImpl; template class SSAUpdaterTraits; - class DbgDeclareInst; - class DIBuilder; - class BumpPtrAllocator; + class PHINode; + class Type; + class Use; + class Value; /// SSAUpdater - This class updates SSA form for a set of values defined in /// multiple blocks. This is used when code duplication or another unstructured @@ -137,12 +139,7 @@ class LoadAndStorePromoter { /// passed into the run method). Clients should implement this with a more /// efficient version if possible. virtual bool isInstInList(Instruction *I, - const SmallVectorImpl &Insts) const { - for (unsigned i = 0, e = Insts.size(); i != e; ++i) - if (Insts[i] == I) - return true; - return false; - } + const SmallVectorImpl &Insts) const; /// doExtraRewritesBeforeFinalDeletion - This hook is invoked after all the /// stores are found and inserted as available values, but diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h index 5a03d224ff7c..a9adbd73c152 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h @@ -15,8 +15,16 @@ #ifndef LLVM_TRANSFORMS_UTILS_SSAUPDATERIMPL_H #define LLVM_TRANSFORMS_UTILS_SSAUPDATERIMPL_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ValueHandle.h" + namespace llvm { +class CastInst; +class PHINode; template class SSAUpdaterTraits; template @@ -372,7 +380,7 @@ class SSAUpdaterImpl { if (!SomePHI) break; if (CheckIfPHIMatches(SomePHI)) { - RecordMatchingPHI(SomePHI); + RecordMatchingPHIs(BlockList); break; } // Match failed: clear all the PHITag values. @@ -429,38 +437,17 @@ class SSAUpdaterImpl { return true; } - /// RecordMatchingPHI - For a PHI node that matches, record it and its input - /// PHIs in both the BBMap and the AvailableVals mapping. - void RecordMatchingPHI(PhiT *PHI) { - SmallVector WorkList; - WorkList.push_back(PHI); - - // Record this PHI. - BlkT *BB = PHI->getParent(); - ValT PHIVal = Traits::GetPHIValue(PHI); - (*AvailableVals)[BB] = PHIVal; - BBMap[BB]->AvailableVal = PHIVal; - - while (!WorkList.empty()) { - PHI = WorkList.pop_back_val(); - - // Iterate through the PHI's incoming values. - for (typename Traits::PHI_iterator I = Traits::PHI_begin(PHI), - E = Traits::PHI_end(PHI); I != E; ++I) { - ValT IncomingVal = I.getIncomingValue(); - PhiT *IncomingPHI = Traits::ValueIsPHI(IncomingVal, Updater); - if (!IncomingPHI) continue; - BB = IncomingPHI->getParent(); - BBInfo *Info = BBMap[BB]; - if (!Info || Info->AvailableVal) - continue; - - // Record the PHI and add it to the worklist. - (*AvailableVals)[BB] = IncomingVal; - Info->AvailableVal = IncomingVal; - WorkList.push_back(IncomingPHI); + /// RecordMatchingPHIs - For each PHI node that matches, record it in both + /// the BBMap and the AvailableVals mapping. + void RecordMatchingPHIs(BlockListTy *BlockList) { + for (typename BlockListTy::iterator I = BlockList->begin(), + E = BlockList->end(); I != E; ++I) + if (PhiT *PHI = (*I)->PHITag) { + BlkT *BB = PHI->getParent(); + ValT PHIVal = Traits::GetPHIValue(PHI); + (*AvailableVals)[BB] = PHIVal; + BBMap[BB]->AvailableVal = PHIVal; } - } } }; diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h index 524cf5ad9793..2632d186ff9b 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h @@ -17,21 +17,23 @@ #define LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ValueHandle.h" namespace llvm { extern cl::opt DisableIVRewrite; -class Loop; -class LoopInfo; -class DominatorTree; -class ScalarEvolution; -class LPPassManager; +class CastInst; class IVUsers; +class Loop; +class LPPassManager; +class PHINode; +class ScalarEvolution; /// Interface for visiting interesting IV users that are recognized but not /// simplified by this utility. class IVVisitor { + virtual void anchor(); public: virtual ~IVVisitor() {} virtual void visitCast(CastInst *Cast) = 0; @@ -47,12 +49,6 @@ bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM, bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM, SmallVectorImpl &Dead); -/// simplifyIVUsers - Simplify instructions recorded by the IVUsers pass. -/// This is a legacy implementation to reproduce the behavior of the -/// IndVarSimplify pass prior to DisableIVRewrite. -bool simplifyIVUsers(IVUsers *IU, ScalarEvolution *SE, LPPassManager *LPM, - SmallVectorImpl &Dead); - } // namespace llvm #endif diff --git a/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h index 7212a8c76069..f175e8371e79 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -22,9 +22,12 @@ class Loop; class LoopInfo; class LPPassManager; -bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, +bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime, unsigned TripMultiple, LoopInfo* LI, LPPassManager* LPM); +bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, + LPPassManager* LPM); + } #endif diff --git a/contrib/llvm/include/llvm/Transforms/Utils/ValueMapper.h b/contrib/llvm/include/llvm/Transforms/Utils/ValueMapper.h index 03846567dbcc..8594707a8482 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/ValueMapper.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/ValueMapper.h @@ -20,7 +20,7 @@ namespace llvm { class Value; class Instruction; - typedef ValueMap > ValueToValueMapTy; + typedef ValueMap ValueToValueMapTy; /// ValueMapTypeRemapper - This is a class that can be implemented by clients /// to remap types when cloning constants and instructions. diff --git a/contrib/llvm/include/llvm/Transforms/Vectorize.h b/contrib/llvm/include/llvm/Transforms/Vectorize.h new file mode 100644 index 000000000000..7701ceb4d0b3 --- /dev/null +++ b/contrib/llvm/include/llvm/Transforms/Vectorize.h @@ -0,0 +1,106 @@ +//===-- Vectorize.h - Vectorization Transformations -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header file defines prototypes for accessor functions that expose passes +// in the Vectorize transformations library. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_H +#define LLVM_TRANSFORMS_VECTORIZE_H + +namespace llvm { +class BasicBlock; +class BasicBlockPass; + +//===----------------------------------------------------------------------===// +/// @brief Vectorize configuration. +struct VectorizeConfig { + //===--------------------------------------------------------------------===// + // Target architecture related parameters + + /// @brief The size of the native vector registers. + unsigned VectorBits; + + /// @brief Vectorize integer values. + bool VectorizeInts; + + /// @brief Vectorize floating-point values. + bool VectorizeFloats; + + /// @brief Vectorize casting (conversion) operations. + bool VectorizeCasts; + + /// @brief Vectorize floating-point math intrinsics. + bool VectorizeMath; + + /// @brief Vectorize the fused-multiply-add intrinsic. + bool VectorizeFMA; + + /// @brief Vectorize loads and stores. + bool VectorizeMemOps; + + /// @brief Only generate aligned loads and stores. + bool AlignedOnly; + + //===--------------------------------------------------------------------===// + // Misc parameters + + /// @brief The required chain depth for vectorization. + unsigned ReqChainDepth; + + /// @brief The maximum search distance for instruction pairs. + unsigned SearchLimit; + + /// @brief The maximum number of candidate pairs with which to use a full + /// cycle check. + unsigned MaxCandPairsForCycleCheck; + + /// @brief Replicating one element to a pair breaks the chain. + bool SplatBreaksChain; + + /// @brief The maximum number of pairable instructions per group. + unsigned MaxInsts; + + /// @brief The maximum number of pairing iterations. + unsigned MaxIter; + + /// @brief Don't boost the chain-depth contribution of loads and stores. + bool NoMemOpBoost; + + /// @brief Use a fast instruction dependency analysis. + bool FastDep; + + /// @brief Initialize the VectorizeConfig from command line options. + VectorizeConfig(); +}; + +//===----------------------------------------------------------------------===// +// +// BBVectorize - A basic-block vectorization pass. +// +BasicBlockPass * +createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig()); + +//===----------------------------------------------------------------------===// +/// @brief Vectorize the BasicBlock. +/// +/// @param BB The BasicBlock to be vectorized +/// @param P The current running pass, should require AliasAnalysis and +/// ScalarEvolution. After the vectorization, AliasAnalysis, +/// ScalarEvolution and CFG are preserved. +/// +/// @return True if the BB is changed, false otherwise. +/// +bool vectorizeBasicBlock(Pass *P, BasicBlock &BB, + const VectorizeConfig &C = VectorizeConfig()); + +} // End llvm namespace + +#endif diff --git a/contrib/llvm/include/llvm/Type.h b/contrib/llvm/include/llvm/Type.h index 43b7dc578886..185258d8ff2a 100644 --- a/contrib/llvm/include/llvm/Type.h +++ b/contrib/llvm/include/llvm/Type.h @@ -16,6 +16,7 @@ #define LLVM_TYPE_H #include "llvm/Support/Casting.h" +#include "llvm/Support/DataTypes.h" namespace llvm { @@ -25,6 +26,7 @@ class raw_ostream; class Module; class LLVMContext; class LLVMContextImpl; +class StringRef; template struct GraphTraits; /// The instances of the Type class are immutable: once they are created, @@ -47,23 +49,24 @@ class Type { enum TypeID { // PrimitiveTypes - make sure LastPrimitiveTyID stays up to date. VoidTyID = 0, ///< 0: type with no size - FloatTyID, ///< 1: 32-bit floating point type - DoubleTyID, ///< 2: 64-bit floating point type - X86_FP80TyID, ///< 3: 80-bit floating point type (X87) - FP128TyID, ///< 4: 128-bit floating point type (112-bit mantissa) - PPC_FP128TyID, ///< 5: 128-bit floating point type (two 64-bits, PowerPC) - LabelTyID, ///< 6: Labels - MetadataTyID, ///< 7: Metadata - X86_MMXTyID, ///< 8: MMX vectors (64 bits, X86 specific) + HalfTyID, ///< 1: 16-bit floating point type + FloatTyID, ///< 2: 32-bit floating point type + DoubleTyID, ///< 3: 64-bit floating point type + X86_FP80TyID, ///< 4: 80-bit floating point type (X87) + FP128TyID, ///< 5: 128-bit floating point type (112-bit mantissa) + PPC_FP128TyID, ///< 6: 128-bit floating point type (two 64-bits, PowerPC) + LabelTyID, ///< 7: Labels + MetadataTyID, ///< 8: Metadata + X86_MMXTyID, ///< 9: MMX vectors (64 bits, X86 specific) // Derived types... see DerivedTypes.h file. // Make sure FirstDerivedTyID stays up to date! - IntegerTyID, ///< 9: Arbitrary bit width integers - FunctionTyID, ///< 10: Functions - StructTyID, ///< 11: Structures - ArrayTyID, ///< 12: Arrays - PointerTyID, ///< 13: Pointers - VectorTyID, ///< 14: SIMD 'packed' format, or other vector type + IntegerTyID, ///< 10: Arbitrary bit width integers + FunctionTyID, ///< 11: Functions + StructTyID, ///< 12: Structures + ArrayTyID, ///< 13: Arrays + PointerTyID, ///< 14: Pointers + VectorTyID, ///< 15: SIMD 'packed' format, or other vector type NumTypeIDs, // Must remain as last defined ID LastPrimitiveTyID = X86_MMXTyID, @@ -74,21 +77,32 @@ class Type { /// Context - This refers to the LLVMContext in which this type was uniqued. LLVMContext &Context; - TypeID ID : 8; // The current base type of this type. - unsigned SubclassData : 24; // Space for subclasses to store data + // Due to Ubuntu GCC bug 910363: + // https://bugs.launchpad.net/ubuntu/+source/gcc-4.5/+bug/910363 + // Bitpack ID and SubclassData manually. + // Note: TypeID : low 8 bit; SubclassData : high 24 bit. + uint32_t IDAndSubclassData; protected: friend class LLVMContextImpl; explicit Type(LLVMContext &C, TypeID tid) - : Context(C), ID(tid), SubclassData(0), - NumContainedTys(0), ContainedTys(0) {} + : Context(C), IDAndSubclassData(0), + NumContainedTys(0), ContainedTys(0) { + setTypeID(tid); + } ~Type() {} - - unsigned getSubclassData() const { return SubclassData; } + + void setTypeID(TypeID ID) { + IDAndSubclassData = (ID & 0xFF) | (IDAndSubclassData & 0xFFFFFF00); + assert(getTypeID() == ID && "TypeID data too large for field"); + } + + unsigned getSubclassData() const { return IDAndSubclassData >> 8; } + void setSubclassData(unsigned val) { - SubclassData = val; + IDAndSubclassData = (IDAndSubclassData & 0xFF) | (val << 8); // Ensure we don't have any accidental truncation. - assert(SubclassData == val && "Subclass data too large for field"); + assert(getSubclassData() == val && "Subclass data too large for field"); } /// NumContainedTys - Keeps track of how many Type*'s there are in the @@ -116,49 +130,54 @@ class Type { /// getTypeID - Return the type id for the type. This will return one /// of the TypeID enum elements defined above. /// - TypeID getTypeID() const { return ID; } + TypeID getTypeID() const { return (TypeID)(IDAndSubclassData & 0xFF); } /// isVoidTy - Return true if this is 'void'. - bool isVoidTy() const { return ID == VoidTyID; } + bool isVoidTy() const { return getTypeID() == VoidTyID; } + + /// isHalfTy - Return true if this is 'half', a 16-bit IEEE fp type. + bool isHalfTy() const { return getTypeID() == HalfTyID; } /// isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type. - bool isFloatTy() const { return ID == FloatTyID; } + bool isFloatTy() const { return getTypeID() == FloatTyID; } /// isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type. - bool isDoubleTy() const { return ID == DoubleTyID; } + bool isDoubleTy() const { return getTypeID() == DoubleTyID; } /// isX86_FP80Ty - Return true if this is x86 long double. - bool isX86_FP80Ty() const { return ID == X86_FP80TyID; } + bool isX86_FP80Ty() const { return getTypeID() == X86_FP80TyID; } /// isFP128Ty - Return true if this is 'fp128'. - bool isFP128Ty() const { return ID == FP128TyID; } + bool isFP128Ty() const { return getTypeID() == FP128TyID; } /// isPPC_FP128Ty - Return true if this is powerpc long double. - bool isPPC_FP128Ty() const { return ID == PPC_FP128TyID; } + bool isPPC_FP128Ty() const { return getTypeID() == PPC_FP128TyID; } /// isFloatingPointTy - Return true if this is one of the five floating point /// types bool isFloatingPointTy() const { - return ID == FloatTyID || ID == DoubleTyID || - ID == X86_FP80TyID || ID == FP128TyID || ID == PPC_FP128TyID; + return getTypeID() == HalfTyID || getTypeID() == FloatTyID || + getTypeID() == DoubleTyID || + getTypeID() == X86_FP80TyID || getTypeID() == FP128TyID || + getTypeID() == PPC_FP128TyID; } /// isX86_MMXTy - Return true if this is X86 MMX. - bool isX86_MMXTy() const { return ID == X86_MMXTyID; } + bool isX86_MMXTy() const { return getTypeID() == X86_MMXTyID; } /// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP. /// bool isFPOrFPVectorTy() const; /// isLabelTy - Return true if this is 'label'. - bool isLabelTy() const { return ID == LabelTyID; } + bool isLabelTy() const { return getTypeID() == LabelTyID; } /// isMetadataTy - Return true if this is 'metadata'. - bool isMetadataTy() const { return ID == MetadataTyID; } + bool isMetadataTy() const { return getTypeID() == MetadataTyID; } /// isIntegerTy - True if this is an instance of IntegerType. /// - bool isIntegerTy() const { return ID == IntegerTyID; } + bool isIntegerTy() const { return getTypeID() == IntegerTyID; } /// isIntegerTy - Return true if this is an IntegerType of the given width. bool isIntegerTy(unsigned Bitwidth) const; @@ -170,23 +189,23 @@ class Type { /// isFunctionTy - True if this is an instance of FunctionType. /// - bool isFunctionTy() const { return ID == FunctionTyID; } + bool isFunctionTy() const { return getTypeID() == FunctionTyID; } /// isStructTy - True if this is an instance of StructType. /// - bool isStructTy() const { return ID == StructTyID; } + bool isStructTy() const { return getTypeID() == StructTyID; } /// isArrayTy - True if this is an instance of ArrayType. /// - bool isArrayTy() const { return ID == ArrayTyID; } + bool isArrayTy() const { return getTypeID() == ArrayTyID; } /// isPointerTy - True if this is an instance of PointerType. /// - bool isPointerTy() const { return ID == PointerTyID; } + bool isPointerTy() const { return getTypeID() == PointerTyID; } /// isVectorTy - True if this is an instance of VectorType. /// - bool isVectorTy() const { return ID == VectorTyID; } + bool isVectorTy() const { return getTypeID() == VectorTyID; } /// canLosslesslyBitCastTo - Return true if this type could be converted /// with a lossless BitCast to type 'Ty'. For example, i8* to i32*. BitCasts @@ -202,14 +221,14 @@ class Type { /// Here are some useful little methods to query what type derived types are /// Note that all other types can just compare to see if this == Type::xxxTy; /// - bool isPrimitiveType() const { return ID <= LastPrimitiveTyID; } - bool isDerivedType() const { return ID >= FirstDerivedTyID; } + bool isPrimitiveType() const { return getTypeID() <= LastPrimitiveTyID; } + bool isDerivedType() const { return getTypeID() >= FirstDerivedTyID; } /// isFirstClassType - Return true if the type is "first class", meaning it /// is a valid type for a Value. /// bool isFirstClassType() const { - return ID != FunctionTyID && ID != VoidTyID; + return getTypeID() != FunctionTyID && getTypeID() != VoidTyID; } /// isSingleValueType - Return true if the type is a valid type for a @@ -217,8 +236,9 @@ class Type { /// and array types. /// bool isSingleValueType() const { - return (ID != VoidTyID && isPrimitiveType()) || - ID == IntegerTyID || ID == PointerTyID || ID == VectorTyID; + return (getTypeID() != VoidTyID && isPrimitiveType()) || + getTypeID() == IntegerTyID || getTypeID() == PointerTyID || + getTypeID() == VectorTyID; } /// isAggregateType - Return true if the type is an aggregate type. This @@ -227,7 +247,7 @@ class Type { /// does not include vector types. /// bool isAggregateType() const { - return ID == StructTyID || ID == ArrayTyID; + return getTypeID() == StructTyID || getTypeID() == ArrayTyID; } /// isSized - Return true if it makes sense to take the size of this type. To @@ -236,12 +256,14 @@ class Type { /// bool isSized() const { // If it's a primitive, it is always sized. - if (ID == IntegerTyID || isFloatingPointTy() || ID == PointerTyID || - ID == X86_MMXTyID) + if (getTypeID() == IntegerTyID || isFloatingPointTy() || + getTypeID() == PointerTyID || + getTypeID() == X86_MMXTyID) return true; // If it is not something that can have a size (e.g. a function or label), // it doesn't have a size. - if (ID != StructTyID && ID != ArrayTyID && ID != VectorTyID) + if (getTypeID() != StructTyID && getTypeID() != ArrayTyID && + getTypeID() != VectorTyID) return false; // Otherwise we have to try harder to decide. return isSizedDerivedType(); @@ -293,6 +315,34 @@ class Type { /// unsigned getNumContainedTypes() const { return NumContainedTys; } + //===--------------------------------------------------------------------===// + // Helper methods corresponding to subclass methods. This forces a cast to + // the specified subclass and calls its accessor. "getVectorNumElements" (for + // example) is shorthand for cast(Ty)->getNumElements(). This is + // only intended to cover the core methods that are frequently used, helper + // methods should not be added here. + + unsigned getIntegerBitWidth() const; + + Type *getFunctionParamType(unsigned i) const; + unsigned getFunctionNumParams() const; + bool isFunctionVarArg() const; + + StringRef getStructName() const; + unsigned getStructNumElements() const; + Type *getStructElementType(unsigned N) const; + + Type *getSequentialElementType() const; + + uint64_t getArrayNumElements() const; + Type *getArrayElementType() const { return getSequentialElementType(); } + + unsigned getVectorNumElements() const; + Type *getVectorElementType() const { return getSequentialElementType(); } + + unsigned getPointerAddressSpace() const; + Type *getPointerElementType() const { return getSequentialElementType(); } + //===--------------------------------------------------------------------===// // Static members exported by the Type class itself. Useful for getting // instances of Type. @@ -306,6 +356,7 @@ class Type { // static Type *getVoidTy(LLVMContext &C); static Type *getLabelTy(LLVMContext &C); + static Type *getHalfTy(LLVMContext &C); static Type *getFloatTy(LLVMContext &C); static Type *getDoubleTy(LLVMContext &C); static Type *getMetadataTy(LLVMContext &C); @@ -324,6 +375,7 @@ class Type { // Convenience methods for getting pointer types with one of the above builtin // types as pointee. // + static PointerType *getHalfPtrTy(LLVMContext &C, unsigned AS = 0); static PointerType *getFloatPtrTy(LLVMContext &C, unsigned AS = 0); static PointerType *getDoublePtrTy(LLVMContext &C, unsigned AS = 0); static PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0); diff --git a/contrib/llvm/include/llvm/User.h b/contrib/llvm/include/llvm/User.h index 62bc9f034618..c52f32f425c4 100644 --- a/contrib/llvm/include/llvm/User.h +++ b/contrib/llvm/include/llvm/User.h @@ -19,6 +19,7 @@ #ifndef LLVM_USER_H #define LLVM_USER_H +#include "llvm/Support/ErrorHandling.h" #include "llvm/Value.h" namespace llvm { @@ -34,6 +35,7 @@ class User : public Value { void *operator new(size_t); // Do not implement template friend struct HungoffOperandTraits; + virtual void anchor(); protected: /// OperandList - This is a pointer to the array of Uses for this User. /// For nodes of fixed arity (e.g. a binary operator) this array will live @@ -64,11 +66,11 @@ class User : public Value { void operator delete(void *Usr); /// placement delete - required by std, but never called. void operator delete(void*, unsigned) { - assert(0 && "Constructor throws?"); + llvm_unreachable("Constructor throws?"); } /// placement delete - required by std, but never called. void operator delete(void*, unsigned, bool) { - assert(0 && "Constructor throws?"); + llvm_unreachable("Constructor throws?"); } protected: template static Use &OpFrom(const U *that) { diff --git a/contrib/llvm/include/llvm/Value.h b/contrib/llvm/include/llvm/Value.h index a71e2fdefd72..a82ac45c49ed 100644 --- a/contrib/llvm/include/llvm/Value.h +++ b/contrib/llvm/include/llvm/Value.h @@ -15,9 +15,7 @@ #define LLVM_VALUE_H #include "llvm/Use.h" -#include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" -#include namespace llvm { @@ -32,8 +30,6 @@ class GlobalAlias; class InlineAsm; class ValueSymbolTable; template class StringMapEntry; -template -class AssertingVH; typedef StringMapEntry ValueName; class raw_ostream; class AssemblyAnnotationWriter; @@ -42,6 +38,7 @@ class LLVMContext; class Twine; class MDNode; class Type; +class StringRef; //===----------------------------------------------------------------------===// // Value Class @@ -110,26 +107,16 @@ class Value { /// All values hold a context through their type. LLVMContext &getContext() const; - // All values can potentially be named... - bool hasName() const { return Name != 0; } + // All values can potentially be named. + bool hasName() const { return Name != 0 && SubclassID != MDStringVal; } ValueName *getValueName() const { return Name; } + void setValueName(ValueName *VN) { Name = VN; } /// getName() - Return a constant reference to the value's name. This is cheap /// and guaranteed to return the same reference as long as the value is not /// modified. - /// - /// This is currently guaranteed to return a StringRef for which data() points - /// to a valid null terminated string. The use of StringRef.data() is - /// deprecated here, however, and clients should not rely on it. If such - /// behavior is needed, clients should use expensive getNameStr(), or switch - /// to an interface that does not depend on null termination. StringRef getName() const; - /// getNameStr() - Return the name of the specified value, *constructing a - /// string* to hold it. This is guaranteed to construct a string and is very - /// expensive, clients should use getName() unless necessary. - std::string getNameStr() const; - /// setName() - Change the name of the value, choosing a new unique name if /// the provided name is taken. /// @@ -205,6 +192,8 @@ class Value { BlockAddressVal, // This is an instance of BlockAddress ConstantExprVal, // This is an instance of ConstantExpr ConstantAggregateZeroVal, // This is an instance of ConstantAggregateZero + ConstantDataArrayVal, // This is an instance of ConstantDataArray + ConstantDataVectorVal, // This is an instance of ConstantDataVector ConstantIntVal, // This is an instance of ConstantInt ConstantFPVal, // This is an instance of ConstantFP ConstantArrayVal, // This is an instance of ConstantArray @@ -273,14 +262,32 @@ class Value { return true; // Values are always values. } - /// stripPointerCasts - This method strips off any unneeded pointer - /// casts from the specified value, returning the original uncasted value. - /// Note that the returned value has pointer type if the specified value does. + /// stripPointerCasts - This method strips off any unneeded pointer casts and + /// all-zero GEPs from the specified value, returning the original uncasted + /// value. If this is called on a non-pointer value, it returns 'this'. Value *stripPointerCasts(); const Value *stripPointerCasts() const { return const_cast(this)->stripPointerCasts(); } + /// stripInBoundsConstantOffsets - This method strips off unneeded pointer casts and + /// all-constant GEPs from the specified value, returning the original + /// pointer value. If this is called on a non-pointer value, it returns + /// 'this'. + Value *stripInBoundsConstantOffsets(); + const Value *stripInBoundsConstantOffsets() const { + return const_cast(this)->stripInBoundsConstantOffsets(); + } + + /// stripInBoundsOffsets - This method strips off unneeded pointer casts and + /// any in-bounds Offsets from the specified value, returning the original + /// pointer value. If this is called on a non-pointer value, it returns + /// 'this'. + Value *stripInBoundsOffsets(); + const Value *stripInBoundsOffsets() const { + return const_cast(this)->stripInBoundsOffsets(); + } + /// isDereferenceablePointer - Test if this value is always a pointer to /// allocated and suitably aligned memory for a simple load or store. bool isDereferenceablePointer() const; diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp index bd132c05c327..95c834b451c9 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp @@ -440,3 +440,19 @@ bool llvm::isIdentifiedObject(const Value *V) { return A->hasNoAliasAttr() || A->hasByValAttr(); return false; } + +/// isKnownNonNull - Return true if we know that the specified value is never +/// null. +bool llvm::isKnownNonNull(const Value *V) { + // Alloca never returns null, malloc might. + if (isa(V)) return true; + + // A byval argument is never null. + if (const Argument *A = dyn_cast(V)) + return A->hasByValAttr(); + + // Global values are not null unless extern weak. + if (const GlobalValue *GV = dyn_cast(V)) + return !GV->hasExternalWeakLinkage(); + return false; +} diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp index d947220e078d..9f219f563739 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp @@ -127,9 +127,8 @@ AliasAnalysis::AliasResult AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) { AliasResult R = getAnalysis().alias(LocA, LocB); - const char *AliasString; + const char *AliasString = 0; switch (R) { - default: llvm_unreachable("Unknown alias type!"); case NoAlias: No++; AliasString = "No alias"; break; case MayAlias: May++; AliasString = "May alias"; break; case PartialAlias: Partial++; AliasString = "Partial alias"; break; @@ -154,9 +153,8 @@ AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { ModRefResult R = getAnalysis().getModRefInfo(CS, Loc); - const char *MRString; + const char *MRString = 0; switch (R) { - default: llvm_unreachable("Unknown mod/ref type!"); case NoModRef: NoMR++; MRString = "NoModRef"; break; case Ref: JustRef++; MRString = "JustRef"; break; case Mod: JustMod++; MRString = "JustMod"; break; diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp index 37271b94a201..ac72983a8d7b 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -193,8 +193,6 @@ bool AAEval::runOnFunction(Function &F) { case AliasAnalysis::MustAlias: PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent()); ++MustAlias; break; - default: - errs() << "Unknown alias query result!\n"; } } } @@ -223,8 +221,6 @@ bool AAEval::runOnFunction(Function &F) { case AliasAnalysis::ModRef: PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent()); ++ModRef; break; - default: - errs() << "Unknown alias query result!\n"; } } } diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp index 3fcd3b55de57..f80e2fba8010 100644 --- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp @@ -189,7 +189,9 @@ bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const { } for (iterator I = begin(), E = end(); I != E; ++I) - if (AA.getModRefInfo(Inst, I.getPointer(), I.getSize()) != + if (AA.getModRefInfo(Inst, AliasAnalysis::Location(I.getPointer(), + I.getSize(), + I.getTBAAInfo())) != AliasAnalysis::NoModRef) return true; diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp index af400ba7e70e..20ecfd26a986 100644 --- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -42,22 +42,6 @@ using namespace llvm; // Useful predicates //===----------------------------------------------------------------------===// -/// isKnownNonNull - Return true if we know that the specified value is never -/// null. -static bool isKnownNonNull(const Value *V) { - // Alloca never returns null, malloc might. - if (isa(V)) return true; - - // A byval argument is never null. - if (const Argument *A = dyn_cast(V)) - return A->hasByValAttr(); - - // Global values are not null unless extern weak. - if (const GlobalValue *GV = dyn_cast(V)) - return !GV->hasExternalWeakLinkage(); - return false; -} - /// isNonEscapingLocalObject - Return true if the pointer is to a function-local /// object that never escapes from the function. static bool isNonEscapingLocalObject(const Value *V) { @@ -100,42 +84,59 @@ static bool isEscapeSource(const Value *V) { /// getObjectSize - Return the size of the object specified by V, or /// UnknownSize if unknown. -static uint64_t getObjectSize(const Value *V, const TargetData &TD) { +static uint64_t getObjectSize(const Value *V, const TargetData &TD, + bool RoundToAlign = false) { Type *AccessTy; + unsigned Align; if (const GlobalVariable *GV = dyn_cast(V)) { if (!GV->hasDefinitiveInitializer()) return AliasAnalysis::UnknownSize; AccessTy = GV->getType()->getElementType(); + Align = GV->getAlignment(); } else if (const AllocaInst *AI = dyn_cast(V)) { if (!AI->isArrayAllocation()) AccessTy = AI->getType()->getElementType(); else return AliasAnalysis::UnknownSize; + Align = AI->getAlignment(); } else if (const CallInst* CI = extractMallocCall(V)) { - if (!isArrayMalloc(V, &TD)) + if (!RoundToAlign && !isArrayMalloc(V, &TD)) // The size is the argument to the malloc call. if (const ConstantInt* C = dyn_cast(CI->getArgOperand(0))) return C->getZExtValue(); return AliasAnalysis::UnknownSize; } else if (const Argument *A = dyn_cast(V)) { - if (A->hasByValAttr()) + if (A->hasByValAttr()) { AccessTy = cast(A->getType())->getElementType(); - else + Align = A->getParamAlignment(); + } else { return AliasAnalysis::UnknownSize; + } } else { return AliasAnalysis::UnknownSize; } - - if (AccessTy->isSized()) - return TD.getTypeAllocSize(AccessTy); - return AliasAnalysis::UnknownSize; + + if (!AccessTy->isSized()) + return AliasAnalysis::UnknownSize; + + uint64_t Size = TD.getTypeAllocSize(AccessTy); + // If there is an explicitly specified alignment, and we need to + // take alignment into account, round up the size. (If the alignment + // is implicit, getTypeAllocSize is sufficient.) + if (RoundToAlign && Align) + Size = RoundUpToAlignment(Size, Align); + + return Size; } /// isObjectSmallerThan - Return true if we can prove that the object specified /// by V is smaller than Size. static bool isObjectSmallerThan(const Value *V, uint64_t Size, const TargetData &TD) { - uint64_t ObjectSize = getObjectSize(V, TD); + // This function needs to use the aligned object size because we allow + // reads a bit past the end given sufficient alignment. + uint64_t ObjectSize = getObjectSize(V, TD, /*RoundToAlign*/true); + return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size; } @@ -706,8 +707,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, // pointer were passed to arguments that were neither of these, then it // couldn't be no-capture. if (!(*CI)->getType()->isPointerTy() || - (!CS.paramHasAttr(ArgNo+1, Attribute::NoCapture) && - !CS.paramHasAttr(ArgNo+1, Attribute::ByVal))) + (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo))) continue; // If this is a no-capture pointer argument, see if we can tell that it @@ -978,10 +978,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // // TODO: Returning PartialAlias instead of MayAlias is a mild hack; the // practical effect of this is protecting TBAA in the case of dynamic - // indices into arrays of unions. An alternative way to solve this would - // be to have clang emit extra metadata for unions and/or union accesses. - // A union-specific solution wouldn't handle the problem for malloc'd - // memory however. + // indices into arrays of unions or malloc'd memory. return PartialAlias; } diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp index d16665fa55cf..8a660f737c9b 100644 --- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -58,6 +58,6 @@ void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const { /// that we should not rely on the value itself, but only on the comparison to /// the other block frequencies. We do this to avoid using of floating points. /// -BlockFrequency BlockFrequencyInfo::getBlockFreq(BasicBlock *BB) const { +BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const { return BFI->getBlockFreq(BB); } diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp index bde3b76708fa..2730ce6c63bf 100644 --- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -12,11 +12,14 @@ //===----------------------------------------------------------------------===// #include "llvm/Constants.h" +#include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Metadata.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -29,121 +32,118 @@ INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob", char BranchProbabilityInfo::ID = 0; -namespace { -// Please note that BranchProbabilityAnalysis is not a FunctionPass. -// It is created by BranchProbabilityInfo (which is a FunctionPass), which -// provides a clear interface. Thanks to that, all heuristics and other -// private methods are hidden in the .cpp file. -class BranchProbabilityAnalysis { +// Weights are for internal use only. They are used by heuristics to help to +// estimate edges' probability. Example: +// +// Using "Loop Branch Heuristics" we predict weights of edges for the +// block BB2. +// ... +// | +// V +// BB1<-+ +// | | +// | | (Weight = 124) +// V | +// BB2--+ +// | +// | (Weight = 4) +// V +// BB3 +// +// Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875 +// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 +static const uint32_t LBH_TAKEN_WEIGHT = 124; +static const uint32_t LBH_NONTAKEN_WEIGHT = 4; - typedef std::pair Edge; +/// \brief Unreachable-terminating branch taken weight. +/// +/// This is the weight for a branch being taken to a block that terminates +/// (eventually) in unreachable. These are predicted as unlikely as possible. +static const uint32_t UR_TAKEN_WEIGHT = 1; - DenseMap *Weights; +/// \brief Unreachable-terminating branch not-taken weight. +/// +/// This is the weight for a branch not being taken toward a block that +/// terminates (eventually) in unreachable. Such a branch is essentially never +/// taken. Set the weight to an absurdly high value so that nested loops don't +/// easily subsume it. +static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1; - BranchProbabilityInfo *BP; +static const uint32_t PH_TAKEN_WEIGHT = 20; +static const uint32_t PH_NONTAKEN_WEIGHT = 12; - LoopInfo *LI; +static const uint32_t ZH_TAKEN_WEIGHT = 20; +static const uint32_t ZH_NONTAKEN_WEIGHT = 12; + +static const uint32_t FPH_TAKEN_WEIGHT = 20; +static const uint32_t FPH_NONTAKEN_WEIGHT = 12; + +// Standard weight value. Used when none of the heuristics set weight for +// the edge. +static const uint32_t NORMAL_WEIGHT = 16; + +// Minimum weight of an edge. Please note, that weight is NEVER 0. +static const uint32_t MIN_WEIGHT = 1; + +static uint32_t getMaxWeightFor(BasicBlock *BB) { + return UINT32_MAX / BB->getTerminator()->getNumSuccessors(); +} - // Weights are for internal use only. They are used by heuristics to help to - // estimate edges' probability. Example: - // - // Using "Loop Branch Heuristics" we predict weights of edges for the - // block BB2. - // ... - // | - // V - // BB1<-+ - // | | - // | | (Weight = 124) - // V | - // BB2--+ - // | - // | (Weight = 4) - // V - // BB3 - // - // Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875 - // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 - - static const uint32_t LBH_TAKEN_WEIGHT = 124; - static const uint32_t LBH_NONTAKEN_WEIGHT = 4; - - static const uint32_t RH_TAKEN_WEIGHT = 24; - static const uint32_t RH_NONTAKEN_WEIGHT = 8; - - static const uint32_t PH_TAKEN_WEIGHT = 20; - static const uint32_t PH_NONTAKEN_WEIGHT = 12; - - static const uint32_t ZH_TAKEN_WEIGHT = 20; - static const uint32_t ZH_NONTAKEN_WEIGHT = 12; - - // Standard weight value. Used when none of the heuristics set weight for - // the edge. - static const uint32_t NORMAL_WEIGHT = 16; - - // Minimum weight of an edge. Please note, that weight is NEVER 0. - static const uint32_t MIN_WEIGHT = 1; - - // Return TRUE if BB leads directly to a Return Instruction. - static bool isReturningBlock(BasicBlock *BB) { - SmallPtrSet Visited; - - while (true) { - TerminatorInst *TI = BB->getTerminator(); - if (isa(TI)) - return true; - - if (TI->getNumSuccessors() > 1) - break; - - // It is unreachable block which we can consider as a return instruction. - if (TI->getNumSuccessors() == 0) - return true; - - Visited.insert(BB); - BB = TI->getSuccessor(0); - - // Stop if cycle is detected. - if (Visited.count(BB)) - return false; - } - +/// \brief Calculate edge weights for successors lead to unreachable. +/// +/// Predict that a successor which leads necessarily to an +/// unreachable-terminated block as extremely unlikely. +bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0) { + if (isa(TI)) + PostDominatedByUnreachable.insert(BB); return false; } - uint32_t getMaxWeightFor(BasicBlock *BB) const { - return UINT32_MAX / BB->getTerminator()->getNumSuccessors(); + SmallPtrSet UnreachableEdges; + SmallPtrSet ReachableEdges; + + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + if (PostDominatedByUnreachable.count(*I)) + UnreachableEdges.insert(*I); + else + ReachableEdges.insert(*I); } -public: - BranchProbabilityAnalysis(DenseMap *W, - BranchProbabilityInfo *BP, LoopInfo *LI) - : Weights(W), BP(BP), LI(LI) { - } + // If all successors are in the set of blocks post-dominated by unreachable, + // this block is too. + if (UnreachableEdges.size() == TI->getNumSuccessors()) + PostDominatedByUnreachable.insert(BB); - // Metadata Weights - bool calcMetadataWeights(BasicBlock *BB); + // Skip probabilities if this block has a single successor or if all were + // reachable. + if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty()) + return false; - // Return Heuristics - bool calcReturnHeuristics(BasicBlock *BB); + uint32_t UnreachableWeight = + std::max(UR_TAKEN_WEIGHT / UnreachableEdges.size(), MIN_WEIGHT); + for (SmallPtrSet::iterator I = UnreachableEdges.begin(), + E = UnreachableEdges.end(); + I != E; ++I) + setEdgeWeight(BB, *I, UnreachableWeight); - // Pointer Heuristics - bool calcPointerHeuristics(BasicBlock *BB); + if (ReachableEdges.empty()) + return true; + uint32_t ReachableWeight = + std::max(UR_NONTAKEN_WEIGHT / ReachableEdges.size(), NORMAL_WEIGHT); + for (SmallPtrSet::iterator I = ReachableEdges.begin(), + E = ReachableEdges.end(); + I != E; ++I) + setEdgeWeight(BB, *I, ReachableWeight); - // Loop Branch Heuristics - bool calcLoopBranchHeuristics(BasicBlock *BB); - - // Zero Heurestics - bool calcZeroHeuristics(BasicBlock *BB); - - bool runOnFunction(Function &F); -}; -} // end anonymous namespace + return true; +} // Propagate existing explicit probabilities from either profile data or // 'expect' intrinsic processing. -bool BranchProbabilityAnalysis::calcMetadataWeights(BasicBlock *BB) { +bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) { TerminatorInst *TI = BB->getTerminator(); if (TI->getNumSuccessors() == 1) return false; @@ -174,54 +174,14 @@ bool BranchProbabilityAnalysis::calcMetadataWeights(BasicBlock *BB) { } assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - BP->setEdgeWeight(BB, TI->getSuccessor(i), Weights[i]); + setEdgeWeight(BB, TI->getSuccessor(i), Weights[i]); return true; } -// Calculate Edge Weights using "Return Heuristics". Predict a successor which -// leads directly to Return Instruction will not be taken. -bool BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){ - if (BB->getTerminator()->getNumSuccessors() == 1) - return false; - - SmallPtrSet ReturningEdges; - SmallPtrSet StayEdges; - - for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - BasicBlock *Succ = *I; - if (isReturningBlock(Succ)) - ReturningEdges.insert(Succ); - else - StayEdges.insert(Succ); - } - - if (uint32_t numStayEdges = StayEdges.size()) { - uint32_t stayWeight = RH_TAKEN_WEIGHT / numStayEdges; - if (stayWeight < NORMAL_WEIGHT) - stayWeight = NORMAL_WEIGHT; - - for (SmallPtrSet::iterator I = StayEdges.begin(), - E = StayEdges.end(); I != E; ++I) - BP->setEdgeWeight(BB, *I, stayWeight); - } - - if (uint32_t numRetEdges = ReturningEdges.size()) { - uint32_t retWeight = RH_NONTAKEN_WEIGHT / numRetEdges; - if (retWeight < MIN_WEIGHT) - retWeight = MIN_WEIGHT; - for (SmallPtrSet::iterator I = ReturningEdges.begin(), - E = ReturningEdges.end(); I != E; ++I) { - BP->setEdgeWeight(BB, *I, retWeight); - } - } - - return ReturningEdges.size() > 0; -} - // Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion // between two pointer or pointer and NULL will fail. -bool BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) { +bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) { BranchInst * BI = dyn_cast(BB->getTerminator()); if (!BI || !BI->isConditional()) return false; @@ -249,16 +209,14 @@ bool BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) { if (!isProb) std::swap(Taken, NonTaken); - BP->setEdgeWeight(BB, Taken, PH_TAKEN_WEIGHT); - BP->setEdgeWeight(BB, NonTaken, PH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, Taken, PH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTaken, PH_NONTAKEN_WEIGHT); return true; } // Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges // as taken, exiting edges as not-taken. -bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { - uint32_t numSuccs = BB->getTerminator()->getNumSuccessors(); - +bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { Loop *L = LI->getLoopFor(BB); if (!L) return false; @@ -267,17 +225,13 @@ bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { SmallPtrSet ExitingEdges; SmallPtrSet InEdges; // Edges from header to the loop. - bool isHeader = BB == L->getHeader(); - for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - BasicBlock *Succ = *I; - Loop *SuccL = LI->getLoopFor(Succ); - if (SuccL != L) - ExitingEdges.insert(Succ); - else if (Succ == L->getHeader()) - BackEdges.insert(Succ); - else if (isHeader) - InEdges.insert(Succ); + if (!L->contains(*I)) + ExitingEdges.insert(*I); + else if (L->getHeader() == *I) + BackEdges.insert(*I); + else + InEdges.insert(*I); } if (uint32_t numBackEdges = BackEdges.size()) { @@ -288,7 +242,7 @@ bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { for (SmallPtrSet::iterator EI = BackEdges.begin(), EE = BackEdges.end(); EI != EE; ++EI) { BasicBlock *Back = *EI; - BP->setEdgeWeight(BB, Back, backWeight); + setEdgeWeight(BB, Back, backWeight); } } @@ -300,27 +254,26 @@ bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { for (SmallPtrSet::iterator EI = InEdges.begin(), EE = InEdges.end(); EI != EE; ++EI) { BasicBlock *Back = *EI; - BP->setEdgeWeight(BB, Back, inWeight); + setEdgeWeight(BB, Back, inWeight); } } - uint32_t numExitingEdges = ExitingEdges.size(); - if (uint32_t numNonExitingEdges = numSuccs - numExitingEdges) { - uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numNonExitingEdges; + if (uint32_t numExitingEdges = ExitingEdges.size()) { + uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numExitingEdges; if (exitWeight < MIN_WEIGHT) exitWeight = MIN_WEIGHT; for (SmallPtrSet::iterator EI = ExitingEdges.begin(), EE = ExitingEdges.end(); EI != EE; ++EI) { BasicBlock *Exiting = *EI; - BP->setEdgeWeight(BB, Exiting, exitWeight); + setEdgeWeight(BB, Exiting, exitWeight); } } return true; } -bool BranchProbabilityAnalysis::calcZeroHeuristics(BasicBlock *BB) { +bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) { BranchInst * BI = dyn_cast(BB->getTerminator()); if (!BI || !BI->isConditional()) return false; @@ -375,45 +328,94 @@ bool BranchProbabilityAnalysis::calcZeroHeuristics(BasicBlock *BB) { if (!isProb) std::swap(Taken, NonTaken); - BP->setEdgeWeight(BB, Taken, ZH_TAKEN_WEIGHT); - BP->setEdgeWeight(BB, NonTaken, ZH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, Taken, ZH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTaken, ZH_NONTAKEN_WEIGHT); return true; } +bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) { + BranchInst *BI = dyn_cast(BB->getTerminator()); + if (!BI || !BI->isConditional()) + return false; -bool BranchProbabilityAnalysis::runOnFunction(Function &F) { + Value *Cond = BI->getCondition(); + FCmpInst *FCmp = dyn_cast(Cond); + if (!FCmp) + return false; - for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { - BasicBlock *BB = I++; - - if (calcMetadataWeights(BB)) - continue; - - if (calcLoopBranchHeuristics(BB)) - continue; - - if (calcReturnHeuristics(BB)) - continue; - - if (calcPointerHeuristics(BB)) - continue; - - calcZeroHeuristics(BB); + bool isProb; + if (FCmp->isEquality()) { + // f1 == f2 -> Unlikely + // f1 != f2 -> Likely + isProb = !FCmp->isTrueWhenEqual(); + } else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) { + // !isnan -> Likely + isProb = true; + } else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) { + // isnan -> Unlikely + isProb = false; + } else { + return false; } - return false; + BasicBlock *Taken = BI->getSuccessor(0); + BasicBlock *NonTaken = BI->getSuccessor(1); + + if (!isProb) + std::swap(Taken, NonTaken); + + setEdgeWeight(BB, Taken, FPH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTaken, FPH_NONTAKEN_WEIGHT); + + return true; } void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.setPreservesAll(); + AU.addRequired(); + AU.setPreservesAll(); } bool BranchProbabilityInfo::runOnFunction(Function &F) { - LoopInfo &LI = getAnalysis(); - BranchProbabilityAnalysis BPA(&Weights, this, &LI); - return BPA.runOnFunction(F); + LastF = &F; // Store the last function we ran on for printing. + LI = &getAnalysis(); + assert(PostDominatedByUnreachable.empty()); + + // Walk the basic blocks in post-order so that we can build up state about + // the successors of a block iteratively. + for (po_iterator I = po_begin(&F.getEntryBlock()), + E = po_end(&F.getEntryBlock()); + I != E; ++I) { + DEBUG(dbgs() << "Computing probabilities for " << I->getName() << "\n"); + if (calcUnreachableHeuristics(*I)) + continue; + if (calcMetadataWeights(*I)) + continue; + if (calcLoopBranchHeuristics(*I)) + continue; + if (calcPointerHeuristics(*I)) + continue; + if (calcZeroHeuristics(*I)) + continue; + calcFloatingPointHeuristics(*I); + } + + PostDominatedByUnreachable.clear(); + return false; +} + +void BranchProbabilityInfo::print(raw_ostream &OS, const Module *) const { + OS << "---- Branch Probabilities ----\n"; + // We print the probabilities from the last function the analysis ran over, + // or the function it is currently running over. + assert(LastF && "Cannot print prior to running over a function"); + for (Function::const_iterator BI = LastF->begin(), BE = LastF->end(); + BI != BE; ++BI) { + for (succ_const_iterator SI = succ_begin(BI), SE = succ_end(BI); + SI != SE; ++SI) { + printEdgeProbability(OS << " ", BI, *SI); + } + } } uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const { @@ -434,12 +436,8 @@ uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const { bool BranchProbabilityInfo:: isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% - uint32_t Weight = getEdgeWeight(Src, Dst); - uint32_t Sum = getSumForBlock(Src); - - // FIXME: Implement BranchProbability::compare then change this code to - // compare this BranchProbability against a static "hot" BranchProbability. - return (uint64_t)Weight * 5 > (uint64_t)Sum * 4; + // FIXME: Compare against a static "hot" BranchProbability. + return getEdgeProbability(Src, Dst) > BranchProbability(4, 5); } BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { @@ -461,8 +459,8 @@ BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { } } - // FIXME: Use BranchProbability::compare. - if ((uint64_t)MaxWeight * 5 > (uint64_t)Sum * 4) + // Hot probability is at least 4/5 = 80% + if (BranchProbability(MaxWeight, Sum) > BranchProbability(4, 5)) return MaxSucc; return 0; @@ -483,8 +481,8 @@ getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { void BranchProbabilityInfo:: setEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst, uint32_t Weight) { Weights[std::make_pair(Src, Dst)] = Weight; - DEBUG(dbgs() << "set edge " << Src->getNameStr() << " -> " - << Dst->getNameStr() << " weight to " << Weight + DEBUG(dbgs() << "set edge " << Src->getName() << " -> " + << Dst->getName() << " weight to " << Weight << (isEdgeHot(Src, Dst) ? " [is HOT now]\n" : "\n")); } @@ -499,11 +497,12 @@ getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const { } raw_ostream & -BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, BasicBlock *Src, - BasicBlock *Dst) const { +BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, + const BasicBlock *Src, + const BasicBlock *Dst) const { const BranchProbability Prob = getEdgeProbability(Src, Dst); - OS << "edge " << Src->getNameStr() << " -> " << Dst->getNameStr() + OS << "edge " << Src->getName() << " -> " << Dst->getName() << " probability is " << Prob << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n"); diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp index 7bb063fbbbcf..76854000bd23 100644 --- a/contrib/llvm/lib/Analysis/CFGPrinter.cpp +++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp @@ -77,7 +77,7 @@ namespace { } virtual bool runOnFunction(Function &F) { - std::string Filename = "cfg." + F.getNameStr() + ".dot"; + std::string Filename = "cfg." + F.getName().str() + ".dot"; errs() << "Writing '" << Filename << "'..."; std::string ErrorInfo; @@ -111,7 +111,7 @@ namespace { } virtual bool runOnFunction(Function &F) { - std::string Filename = "cfg." + F.getNameStr() + ".dot"; + std::string Filename = "cfg." + F.getName().str() + ".dot"; errs() << "Writing '" << Filename << "'..."; std::string ErrorInfo; @@ -143,7 +143,7 @@ INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only", /// being a 'dot' and 'gv' program in your path. /// void Function::viewCFG() const { - ViewGraph(this, "cfg" + getNameStr()); + ViewGraph(this, "cfg" + getName()); } /// viewCFGOnly - This function is meant for use from the debugger. It works @@ -152,7 +152,7 @@ void Function::viewCFG() const { /// his can make the graph smaller. /// void Function::viewCFGOnly() const { - ViewGraph(this, "cfg" + getNameStr(), true); + ViewGraph(this, "cfg" + getName(), true); } FunctionPass *llvm::createCFGPrinterPass () { diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp index b2c27d1dfc4b..dd33eeb1b376 100644 --- a/contrib/llvm/lib/Analysis/CaptureTracking.cpp +++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp @@ -16,25 +16,35 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/Value.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/CallSite.h" +#include "llvm/Analysis/CaptureTracking.h" using namespace llvm; -/// As its comment mentions, PointerMayBeCaptured can be expensive. -/// However, it's not easy for BasicAA to cache the result, because -/// it's an ImmutablePass. To work around this, bound queries at a -/// fixed number of uses. -/// -/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep -/// a cache. Then we can move the code from BasicAliasAnalysis into -/// that path, and remove this threshold. -static int const Threshold = 20; +CaptureTracker::~CaptureTracker() {} + +namespace { + struct SimpleCaptureTracker : public CaptureTracker { + explicit SimpleCaptureTracker(bool ReturnCaptures) + : ReturnCaptures(ReturnCaptures), Captured(false) {} + + void tooManyUses() { Captured = true; } + + bool shouldExplore(Use *U) { return true; } + + bool captured(Use *U) { + if (isa(U->getUser()) && !ReturnCaptures) + return false; + + Captured = true; + return true; + } + + bool ReturnCaptures; + + bool Captured; + }; +} /// PointerMayBeCaptured - Return true if this pointer value may be captured /// by the enclosing function (which is required to exist). This routine can @@ -45,6 +55,26 @@ static int const Threshold = 20; /// counts as capturing it or not. bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures) { + assert(!isa(V) && + "It doesn't make sense to ask whether a global is captured."); + + // TODO: If StoreCaptures is not true, we could do Fancy analysis + // to determine whether this store is not actually an escape point. + // In that case, BasicAliasAnalysis should be updated as well to + // take advantage of this. + (void)StoreCaptures; + + SimpleCaptureTracker SCT(ReturnCaptures); + PointerMayBeCaptured(V, &SCT); + return SCT.Captured; +} + +/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep +/// a cache. Then we can move the code from BasicAliasAnalysis into +/// that path, and remove this threshold. +static int const Threshold = 20; + +void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) { assert(V->getType()->isPointerTy() && "Capture is for pointers only!"); SmallVector Worklist; SmallSet Visited; @@ -55,9 +85,10 @@ bool llvm::PointerMayBeCaptured(const Value *V, // If there are lots of uses, conservatively say that the value // is captured to avoid taking too much compile time. if (Count++ >= Threshold) - return true; + return Tracker->tooManyUses(); Use *U = &UI.getUse(); + if (!Tracker->shouldExplore(U)) continue; Visited.insert(U); Worklist.push_back(U); } @@ -86,11 +117,10 @@ bool llvm::PointerMayBeCaptured(const Value *V, // (think of self-referential objects). CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); for (CallSite::arg_iterator A = B; A != E; ++A) - if (A->get() == V && !CS.paramHasAttr(A - B + 1, Attribute::NoCapture)) + if (A->get() == V && !CS.doesNotCapture(A - B)) // The parameter is not marked 'nocapture' - captured. - return true; - // Only passed via 'nocapture' arguments, or is the called function - not - // captured. + if (Tracker->captured(U)) + return; break; } case Instruction::Load: @@ -99,18 +129,11 @@ bool llvm::PointerMayBeCaptured(const Value *V, case Instruction::VAArg: // "va-arg" from a pointer does not cause it to be captured. break; - case Instruction::Ret: - if (ReturnCaptures) - return true; - break; case Instruction::Store: if (V == I->getOperand(0)) // Stored the pointer - conservatively assume it may be captured. - // TODO: If StoreCaptures is not true, we could do Fancy analysis - // to determine whether this store is not actually an escape point. - // In that case, BasicAliasAnalysis should be updated as well to - // take advantage of this. - return true; + if (Tracker->captured(U)) + return; // Storing to the pointee does not cause the pointer to be captured. break; case Instruction::BitCast: @@ -122,7 +145,8 @@ bool llvm::PointerMayBeCaptured(const Value *V, UI != UE; ++UI) { Use *U = &UI.getUse(); if (Visited.insert(U)) - Worklist.push_back(U); + if (Tracker->shouldExplore(U)) + Worklist.push_back(U); } break; case Instruction::ICmp: @@ -136,13 +160,16 @@ bool llvm::PointerMayBeCaptured(const Value *V, break; // Otherwise, be conservative. There are crazy ways to capture pointers // using comparisons. - return true; + if (Tracker->captured(U)) + return; + break; default: // Something else - be conservative and say it is captured. - return true; + if (Tracker->captured(U)) + return; + break; } } - // All uses examined - not captured. - return false; + // All uses examined. } diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp new file mode 100644 index 000000000000..316e7bc9349a --- /dev/null +++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp @@ -0,0 +1,184 @@ +//===- CodeMetrics.cpp - Code cost measurements ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements code cost measurement utilities. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Function.h" +#include "llvm/Support/CallSite.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Target/TargetData.h" + +using namespace llvm; + +/// callIsSmall - If a call is likely to lower to a single target instruction, +/// or is otherwise deemed small return true. +/// TODO: Perhaps calls like memcpy, strcpy, etc? +bool llvm::callIsSmall(const Function *F) { + if (!F) return false; + + if (F->hasLocalLinkage()) return false; + + if (!F->hasName()) return false; + + StringRef Name = F->getName(); + + // These will all likely lower to a single selection DAG node. + if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || + Name == "fabs" || Name == "fabsf" || Name == "fabsl" || + Name == "sin" || Name == "sinf" || Name == "sinl" || + Name == "cos" || Name == "cosf" || Name == "cosl" || + Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) + return true; + + // These are all likely to be optimized into something smaller. + if (Name == "pow" || Name == "powf" || Name == "powl" || + Name == "exp2" || Name == "exp2l" || Name == "exp2f" || + Name == "floor" || Name == "floorf" || Name == "ceil" || + Name == "round" || Name == "ffs" || Name == "ffsl" || + Name == "abs" || Name == "labs" || Name == "llabs") + return true; + + return false; +} + +bool llvm::isInstructionFree(const Instruction *I, const TargetData *TD) { + if (isa(I)) + return true; + + // If a GEP has all constant indices, it will probably be folded with + // a load/store. + if (const GetElementPtrInst *GEP = dyn_cast(I)) + return GEP->hasAllConstantIndices(); + + if (const IntrinsicInst *II = dyn_cast(I)) { + switch (II->getIntrinsicID()) { + default: + return false; + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + // These intrinsics don't count as size. + return true; + } + } + + if (const CastInst *CI = dyn_cast(I)) { + // Noop casts, including ptr <-> int, don't count. + if (CI->isLosslessCast() || isa(CI) || isa(CI)) + return true; + // trunc to a native type is free (assuming the target has compare and + // shift-right of the same width). + if (TD && isa(CI) && + TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) + return true; + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // nop on most sane targets. + if (isa(CI->getOperand(0))) + return true; + } + + return false; +} + +/// analyzeBasicBlock - Fill in the current structure with information gleaned +/// from the specified block. +void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, + const TargetData *TD) { + ++NumBlocks; + unsigned NumInstsBeforeThisBB = NumInsts; + for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); + II != E; ++II) { + if (isInstructionFree(II, TD)) + continue; + + // Special handling for calls. + if (isa(II) || isa(II)) { + ImmutableCallSite CS(cast(II)); + + if (const Function *F = CS.getCalledFunction()) { + // If a function is both internal and has a single use, then it is + // extremely likely to get inlined in the future (it was probably + // exposed by an interleaved devirtualization pass). + if (!CS.isNoInline() && F->hasInternalLinkage() && F->hasOneUse()) + ++NumInlineCandidates; + + // If this call is to function itself, then the function is recursive. + // Inlining it into other functions is a bad idea, because this is + // basically just a form of loop peeling, and our metrics aren't useful + // for that case. + if (F == BB->getParent()) + isRecursive = true; + } + + if (!isa(II) && !callIsSmall(CS.getCalledFunction())) { + // Each argument to a call takes on average one instruction to set up. + NumInsts += CS.arg_size(); + + // We don't want inline asm to count as a call - that would prevent loop + // unrolling. The argument setup cost is still real, though. + if (!isa(CS.getCalledValue())) + ++NumCalls; + } + } + + if (const AllocaInst *AI = dyn_cast(II)) { + if (!AI->isStaticAlloca()) + this->usesDynamicAlloca = true; + } + + if (isa(II) || II->getType()->isVectorTy()) + ++NumVectorInsts; + + ++NumInsts; + } + + if (isa(BB->getTerminator())) + ++NumRets; + + // We never want to inline functions that contain an indirectbr. This is + // incorrect because all the blockaddress's (in static global initializers + // for example) would be referring to the original function, and this indirect + // jump would jump from the inlined copy of the function into the original + // function which is extremely undefined behavior. + // FIXME: This logic isn't really right; we can safely inline functions + // with indirectbr's as long as no other function or global references the + // blockaddress of a block within the current function. And as a QOI issue, + // if someone is using a blockaddress without an indirectbr, and that + // reference somehow ends up in another function or global, we probably + // don't want to inline this function. + if (isa(BB->getTerminator())) + containsIndirectBr = true; + + // Remember NumInsts for this BB. + NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; +} + +void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) { + // If this function contains a call that "returns twice" (e.g., setjmp or + // _setjmp) and it isn't marked with "returns twice" itself, never inline it. + // This is a hack because we depend on the user marking their local variables + // as volatile if they are live across a setjmp call, and they probably + // won't do this in callers. + exposesReturnsTwice = F->callsFunctionThatReturnsTwice() && + !F->hasFnAttr(Attribute::ReturnsTwice); + + // Look at the size of the callee. + for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + analyzeBasicBlock(&*BB, TD); +} diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp index df79849c3cf4..7a0a4e1e8246 100644 --- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp +++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp @@ -26,6 +26,7 @@ #include "llvm/Operator.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/ErrorHandling.h" @@ -51,6 +52,42 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, if (C->isAllOnesValue() && !DestTy->isX86_MMXTy()) return Constant::getAllOnesValue(DestTy); + // Handle a vector->integer cast. + if (IntegerType *IT = dyn_cast(DestTy)) { + ConstantDataVector *CDV = dyn_cast(C); + if (CDV == 0) + return ConstantExpr::getBitCast(C, DestTy); + + unsigned NumSrcElts = CDV->getType()->getNumElements(); + + Type *SrcEltTy = CDV->getType()->getElementType(); + + // If the vector is a vector of floating point, convert it to vector of int + // to simplify things. + if (SrcEltTy->isFloatingPointTy()) { + unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); + Type *SrcIVTy = + VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts); + // Ask VMCore to do the conversion now that #elts line up. + C = ConstantExpr::getBitCast(C, SrcIVTy); + CDV = cast(C); + } + + // Now that we know that the input value is a vector of integers, just shift + // and insert them into our result. + unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy); + APInt Result(IT->getBitWidth(), 0); + for (unsigned i = 0; i != NumSrcElts; ++i) { + Result <<= BitShift; + if (TD.isLittleEndian()) + Result |= CDV->getElementAsInteger(NumSrcElts-i-1); + else + Result |= CDV->getElementAsInteger(i); + } + + return ConstantInt::get(IT, Result); + } + // The code below only handles casts to vectors currently. VectorType *DestVTy = dyn_cast(DestTy); if (DestVTy == 0) @@ -64,17 +101,16 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, } // If this is a bitcast from constant vector -> vector, fold it. - ConstantVector *CV = dyn_cast(C); - if (CV == 0) + if (!isa(C) && !isa(C)) return ConstantExpr::getBitCast(C, DestTy); // If the element types match, VMCore can fold it. unsigned NumDstElt = DestVTy->getNumElements(); - unsigned NumSrcElt = CV->getNumOperands(); + unsigned NumSrcElt = C->getType()->getVectorNumElements(); if (NumDstElt == NumSrcElt) return ConstantExpr::getBitCast(C, DestTy); - Type *SrcEltTy = CV->getType()->getElementType(); + Type *SrcEltTy = C->getType()->getVectorElementType(); Type *DstEltTy = DestVTy->getElementType(); // Otherwise, we're changing the number of elements in a vector, which @@ -94,7 +130,6 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt); // Recursively handle this integer conversion, if possible. C = FoldBitCast(C, DestIVTy, TD); - if (!C) return ConstantExpr::getBitCast(C, DestTy); // Finally, VMCore can handle this now that #elts line up. return ConstantExpr::getBitCast(C, DestTy); @@ -108,8 +143,9 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt); // Ask VMCore to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); - CV = dyn_cast(C); - if (!CV) // If VMCore wasn't able to fold it, bail out. + // If VMCore wasn't able to fold it, bail out. + if (!isa(C) && // FIXME: Remove ConstantVector. + !isa(C)) return C; } @@ -131,7 +167,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, Constant *Elt = Zero; unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1); for (unsigned j = 0; j != Ratio; ++j) { - Constant *Src = dyn_cast(CV->getOperand(SrcElt++)); + Constant *Src =dyn_cast(C->getAggregateElement(SrcElt++)); if (!Src) // Reject constantexpr elements. return ConstantExpr::getBitCast(C, DestTy); @@ -148,28 +184,29 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, } Result.push_back(Elt); } - } else { - // Handle: bitcast (<2 x i64> to <4 x i32>) - unsigned Ratio = NumDstElt/NumSrcElt; - unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits(); + return ConstantVector::get(Result); + } + + // Handle: bitcast (<2 x i64> to <4 x i32>) + unsigned Ratio = NumDstElt/NumSrcElt; + unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits(); + + // Loop over each source value, expanding into multiple results. + for (unsigned i = 0; i != NumSrcElt; ++i) { + Constant *Src = dyn_cast(C->getAggregateElement(i)); + if (!Src) // Reject constantexpr elements. + return ConstantExpr::getBitCast(C, DestTy); - // Loop over each source value, expanding into multiple results. - for (unsigned i = 0; i != NumSrcElt; ++i) { - Constant *Src = dyn_cast(CV->getOperand(i)); - if (!Src) // Reject constantexpr elements. - return ConstantExpr::getBitCast(C, DestTy); + unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1); + for (unsigned j = 0; j != Ratio; ++j) { + // Shift the piece of the value into the right place, depending on + // endianness. + Constant *Elt = ConstantExpr::getLShr(Src, + ConstantInt::get(Src->getType(), ShiftAmt)); + ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; - unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1); - for (unsigned j = 0; j != Ratio; ++j) { - // Shift the piece of the value into the right place, depending on - // endianness. - Constant *Elt = ConstantExpr::getLShr(Src, - ConstantInt::get(Src->getType(), ShiftAmt)); - ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; - - // Truncate and remember this piece. - Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy)); - } + // Truncate and remember this piece. + Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy)); } } @@ -272,7 +309,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, } return false; } - + if (ConstantStruct *CS = dyn_cast(C)) { const StructLayout *SL = TD.getStructLayout(CS->getType()); unsigned Index = SL->getElementContainingOffset(ByteOffset); @@ -310,12 +347,20 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, // not reached. } - if (ConstantArray *CA = dyn_cast(C)) { - uint64_t EltSize = TD.getTypeAllocSize(CA->getType()->getElementType()); + if (isa(C) || isa(C) || + isa(C)) { + Type *EltTy = cast(C->getType())->getElementType(); + uint64_t EltSize = TD.getTypeAllocSize(EltTy); uint64_t Index = ByteOffset / EltSize; uint64_t Offset = ByteOffset - Index * EltSize; - for (; Index != CA->getType()->getNumElements(); ++Index) { - if (!ReadDataFromGlobal(CA->getOperand(Index), Offset, CurPtr, + uint64_t NumElts; + if (ArrayType *AT = dyn_cast(C->getType())) + NumElts = AT->getNumElements(); + else + NumElts = cast(C->getType())->getNumElements(); + + for (; Index != NumElts; ++Index) { + if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr, BytesLeft, TD)) return false; if (EltSize >= BytesLeft) @@ -327,30 +372,12 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, } return true; } - - if (ConstantVector *CV = dyn_cast(C)) { - uint64_t EltSize = TD.getTypeAllocSize(CV->getType()->getElementType()); - uint64_t Index = ByteOffset / EltSize; - uint64_t Offset = ByteOffset - Index * EltSize; - for (; Index != CV->getType()->getNumElements(); ++Index) { - if (!ReadDataFromGlobal(CV->getOperand(Index), Offset, CurPtr, - BytesLeft, TD)) - return false; - if (EltSize >= BytesLeft) - return true; - Offset = 0; - BytesLeft -= EltSize; - CurPtr += EltSize; - } - return true; - } - if (ConstantExpr *CE = dyn_cast(C)) { if (CE->getOpcode() == Instruction::IntToPtr && CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext())) - return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, - BytesLeft, TD); + return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, + BytesLeft, TD); } // Otherwise, unknown initializer type. @@ -445,9 +472,9 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, // Instead of loading constant c string, use corresponding integer value // directly if string length is small enough. - std::string Str; - if (TD && GetConstantStringInfo(CE, Str) && !Str.empty()) { - unsigned StrLen = Str.length(); + StringRef Str; + if (TD && getConstantStringInfo(CE, Str) && !Str.empty()) { + unsigned StrLen = Str.size(); Type *Ty = cast(CE->getType())->getElementType(); unsigned NumBits = Ty->getPrimitiveSizeInBits(); // Replace load with immediate integer if the result is an integer or fp @@ -542,8 +569,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, /// explicitly cast them so that they aren't implicitly casted by the /// getelementptr. static Constant *CastGEPIndices(ArrayRef Ops, - Type *ResultTy, - const TargetData *TD) { + Type *ResultTy, const TargetData *TD, + const TargetLibraryInfo *TLI) { if (!TD) return 0; Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext()); @@ -568,7 +595,7 @@ static Constant *CastGEPIndices(ArrayRef Ops, Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs); if (ConstantExpr *CE = dyn_cast(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; return C; } @@ -576,10 +603,11 @@ static Constant *CastGEPIndices(ArrayRef Ops, /// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP /// constant expression, do so. static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, - Type *ResultTy, - const TargetData *TD) { + Type *ResultTy, const TargetData *TD, + const TargetLibraryInfo *TLI) { Constant *Ptr = Ops[0]; - if (!TD || !cast(Ptr->getType())->getElementType()->isSized()) + if (!TD || !cast(Ptr->getType())->getElementType()->isSized() || + !Ptr->getType()->isPointerTy()) return 0; Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext()); @@ -602,7 +630,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, Res = ConstantExpr::getSub(Res, CE->getOperand(1)); Res = ConstantExpr::getIntToPtr(Res, ResultTy); if (ConstantExpr *ResCE = dyn_cast(Res)) - Res = ConstantFoldConstantExpression(ResCE, TD); + Res = ConstantFoldConstantExpression(ResCE, TD, TLI); return Res; } } @@ -729,7 +757,9 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, /// Note that this fails if not all of the operands are constant. Otherwise, /// this function can only fail when attempting to fold instructions like loads /// and stores, which have no constant expression form. -Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { +Constant *llvm::ConstantFoldInstruction(Instruction *I, + const TargetData *TD, + const TargetLibraryInfo *TLI) { // Handle PHI nodes quickly here... if (PHINode *PN = dyn_cast(I)) { Constant *CommonValue = 0; @@ -765,7 +795,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { if (const CmpInst *CI = dyn_cast(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1], - TD); + TD, TLI); if (const LoadInst *LI = dyn_cast(I)) return ConstantFoldLoadInst(LI, TD); @@ -781,28 +811,29 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { cast(EVI->getAggregateOperand()), EVI->getIndices()); - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI); } /// ConstantFoldConstantExpression - Attempt to fold the constant expression /// using the specified TargetData. If successful, the constant result is /// result is returned, if not, null is returned. Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { SmallVector Ops; for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) { Constant *NewC = cast(*i); // Recursively fold the ConstantExpr's operands. if (ConstantExpr *NewCE = dyn_cast(NewC)) - NewC = ConstantFoldConstantExpression(NewCE, TD); + NewC = ConstantFoldConstantExpression(NewCE, TD, TLI); Ops.push_back(NewC); } if (CE->isCompare()) return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1], - TD); - return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD); + TD, TLI); + return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI); } /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the @@ -817,7 +848,8 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, /// Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, ArrayRef Ops, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { // Handle easy binops first. if (Instruction::isBinaryOp(Opcode)) { if (isa(Ops[0]) || isa(Ops[1])) @@ -830,11 +862,11 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, switch (Opcode) { default: return 0; case Instruction::ICmp: - case Instruction::FCmp: assert(0 && "Invalid for compares"); + case Instruction::FCmp: llvm_unreachable("Invalid for compares"); case Instruction::Call: if (Function *F = dyn_cast(Ops.back())) if (canConstantFoldCallTo(F)) - return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1)); + return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI); return 0; case Instruction::PtrToInt: // If the input is a inttoptr, eliminate the pair. This requires knowing @@ -888,9 +920,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, case Instruction::ShuffleVector: return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); case Instruction::GetElementPtr: - if (Constant *C = CastGEPIndices(Ops, DestTy, TD)) + if (Constant *C = CastGEPIndices(Ops, DestTy, TD, TLI)) return C; - if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD)) + if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD, TLI)) return C; return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1)); @@ -903,7 +935,8 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, /// Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *Ops0, Constant *Ops1, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { // fold: icmp (inttoptr x), null -> icmp x, 0 // fold: icmp (ptrtoint x), 0 -> icmp x, null // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y @@ -920,7 +953,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0), IntPtrTy, false); Constant *Null = Constant::getNullValue(C->getType()); - return ConstantFoldCompareInstOperands(Predicate, C, Null, TD); + return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); } // Only do this transformation if the int is intptrty in size, otherwise @@ -929,7 +962,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, CE0->getType() == IntPtrTy) { Constant *C = CE0->getOperand(0); Constant *Null = Constant::getNullValue(C->getType()); - return ConstantFoldCompareInstOperands(Predicate, C, Null, TD); + return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); } } @@ -944,7 +977,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, IntPtrTy, false); Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0), IntPtrTy, false); - return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD); + return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD, TLI); } // Only do this transformation if the int is intptrty in size, otherwise @@ -953,7 +986,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, CE0->getType() == IntPtrTy && CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType())) return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), - CE1->getOperand(0), TD); + CE1->getOperand(0), TD, TLI); } } @@ -962,13 +995,15 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) && CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) { Constant *LHS = - ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,TD); + ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1, + TD, TLI); Constant *RHS = - ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,TD); + ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1, + TD, TLI); unsigned OpC = Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; Constant *Ops[] = { LHS, RHS }; - return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD); + return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD, TLI); } } @@ -981,56 +1016,30 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, /// constant expression, or null if something is funny and we can't decide. Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE) { - if (CE->getOperand(1) != Constant::getNullValue(CE->getOperand(1)->getType())) + if (!CE->getOperand(1)->isNullValue()) return 0; // Do not allow stepping over the value! - + // Loop over all of the operands, tracking down which value we are - // addressing... - gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE); - for (++I; I != E; ++I) - if (StructType *STy = dyn_cast(*I)) { - ConstantInt *CU = cast(I.getOperand()); - assert(CU->getZExtValue() < STy->getNumElements() && - "Struct index out of range!"); - unsigned El = (unsigned)CU->getZExtValue(); - if (ConstantStruct *CS = dyn_cast(C)) { - C = CS->getOperand(El); - } else if (isa(C)) { - C = Constant::getNullValue(STy->getElementType(El)); - } else if (isa(C)) { - C = UndefValue::get(STy->getElementType(El)); - } else { - return 0; - } - } else if (ConstantInt *CI = dyn_cast(I.getOperand())) { - if (ArrayType *ATy = dyn_cast(*I)) { - if (CI->getZExtValue() >= ATy->getNumElements()) - return 0; - if (ConstantArray *CA = dyn_cast(C)) - C = CA->getOperand(CI->getZExtValue()); - else if (isa(C)) - C = Constant::getNullValue(ATy->getElementType()); - else if (isa(C)) - C = UndefValue::get(ATy->getElementType()); - else - return 0; - } else if (VectorType *VTy = dyn_cast(*I)) { - if (CI->getZExtValue() >= VTy->getNumElements()) - return 0; - if (ConstantVector *CP = dyn_cast(C)) - C = CP->getOperand(CI->getZExtValue()); - else if (isa(C)) - C = Constant::getNullValue(VTy->getElementType()); - else if (isa(C)) - C = UndefValue::get(VTy->getElementType()); - else - return 0; - } else { - return 0; - } - } else { - return 0; - } + // addressing. + for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) { + C = C->getAggregateElement(CE->getOperand(i)); + if (C == 0) return 0; + } + return C; +} + +/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr +/// indices (with an *implied* zero pointer index that is not in the list), +/// return the constant value being addressed by a virtual load, or null if +/// something is funny and we can't decide. +Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, + ArrayRef Indices) { + // Loop over all of the operands, tracking down which value we are + // addressing. + for (unsigned i = 0, e = Indices.size(); i != e; ++i) { + C = C->getAggregateElement(Indices[i]); + if (C == 0) return 0; + } return C; } @@ -1045,6 +1054,7 @@ bool llvm::canConstantFoldCallTo(const Function *F) { switch (F->getIntrinsicID()) { case Intrinsic::sqrt: + case Intrinsic::pow: case Intrinsic::powi: case Intrinsic::bswap: case Intrinsic::ctpop: @@ -1115,7 +1125,6 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, if (Ty->isDoubleTy()) return ConstantFP::get(Ty->getContext(), APFloat(V)); llvm_unreachable("Can only constant fold float/double"); - return 0; // dummy return to suppress warning } static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), @@ -1132,7 +1141,6 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), if (Ty->isDoubleTy()) return ConstantFP::get(Ty->getContext(), APFloat(V)); llvm_unreachable("Can only constant fold float/double"); - return 0; // dummy return to suppress warning } /// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer @@ -1143,11 +1151,8 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), /// available for the result. Returns null if the conversion cannot be /// performed, otherwise returns the Constant value resulting from the /// conversion. -static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero, - Type *Ty) { - assert(Op && "Called with NULL operand"); - APFloat Val(Op->getValueAPF()); - +static Constant *ConstantFoldConvertToInt(const APFloat &Val, + bool roundTowardZero, Type *Ty) { // All of these conversion intrinsics form an integer of at most 64bits. unsigned ResultWidth = cast(Ty)->getBitWidth(); assert(ResultWidth <= 64 && @@ -1168,7 +1173,8 @@ static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero, /// ConstantFoldCall - Attempt to constant fold a call to the specified function /// with the specified arguments, returning null if unsuccessful. Constant * -llvm::ConstantFoldCall(Function *F, ArrayRef Operands) { +llvm::ConstantFoldCall(Function *F, ArrayRef Operands, + const TargetLibraryInfo *TLI) { if (!F->hasName()) return 0; StringRef Name = F->getName(); @@ -1183,6 +1189,8 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands) { return ConstantInt::get(F->getContext(), Val.bitcastToAPInt()); } + if (!TLI) + return 0; if (!Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; @@ -1201,43 +1209,43 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands) { Op->getValueAPF().convertToDouble(); switch (Name[0]) { case 'a': - if (Name == "acos") + if (Name == "acos" && TLI->has(LibFunc::acos)) return ConstantFoldFP(acos, V, Ty); - else if (Name == "asin") + else if (Name == "asin" && TLI->has(LibFunc::asin)) return ConstantFoldFP(asin, V, Ty); - else if (Name == "atan") + else if (Name == "atan" && TLI->has(LibFunc::atan)) return ConstantFoldFP(atan, V, Ty); break; case 'c': - if (Name == "ceil") + if (Name == "ceil" && TLI->has(LibFunc::ceil)) return ConstantFoldFP(ceil, V, Ty); - else if (Name == "cos") + else if (Name == "cos" && TLI->has(LibFunc::cos)) return ConstantFoldFP(cos, V, Ty); - else if (Name == "cosh") + else if (Name == "cosh" && TLI->has(LibFunc::cosh)) return ConstantFoldFP(cosh, V, Ty); - else if (Name == "cosf") + else if (Name == "cosf" && TLI->has(LibFunc::cosf)) return ConstantFoldFP(cos, V, Ty); break; case 'e': - if (Name == "exp") + if (Name == "exp" && TLI->has(LibFunc::exp)) return ConstantFoldFP(exp, V, Ty); - if (Name == "exp2") { + if (Name == "exp2" && TLI->has(LibFunc::exp2)) { // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a // C99 library. return ConstantFoldBinaryFP(pow, 2.0, V, Ty); } break; case 'f': - if (Name == "fabs") + if (Name == "fabs" && TLI->has(LibFunc::fabs)) return ConstantFoldFP(fabs, V, Ty); - else if (Name == "floor") + else if (Name == "floor" && TLI->has(LibFunc::floor)) return ConstantFoldFP(floor, V, Ty); break; case 'l': - if (Name == "log" && V > 0) + if (Name == "log" && V > 0 && TLI->has(LibFunc::log)) return ConstantFoldFP(log, V, Ty); - else if (Name == "log10" && V > 0) + else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) return ConstantFoldFP(log10, V, Ty); else if (F->getIntrinsicID() == Intrinsic::sqrt && (Ty->isFloatTy() || Ty->isDoubleTy())) { @@ -1248,21 +1256,21 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands) { } break; case 's': - if (Name == "sin") + if (Name == "sin" && TLI->has(LibFunc::sin)) return ConstantFoldFP(sin, V, Ty); - else if (Name == "sinh") + else if (Name == "sinh" && TLI->has(LibFunc::sinh)) return ConstantFoldFP(sinh, V, Ty); - else if (Name == "sqrt" && V >= 0) + else if (Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt)) return ConstantFoldFP(sqrt, V, Ty); - else if (Name == "sqrtf" && V >= 0) + else if (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf)) return ConstantFoldFP(sqrt, V, Ty); - else if (Name == "sinf") + else if (Name == "sinf" && TLI->has(LibFunc::sinf)) return ConstantFoldFP(sin, V, Ty); break; case 't': - if (Name == "tan") + if (Name == "tan" && TLI->has(LibFunc::tan)) return ConstantFoldFP(tan, V, Ty); - else if (Name == "tanh") + else if (Name == "tanh" && TLI->has(LibFunc::tanh)) return ConstantFoldFP(tanh, V, Ty); break; default: @@ -1277,10 +1285,6 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands) { return ConstantInt::get(F->getContext(), Op->getValue().byteSwap()); case Intrinsic::ctpop: return ConstantInt::get(Ty, Op->getValue().countPopulation()); - case Intrinsic::cttz: - return ConstantInt::get(Ty, Op->getValue().countTrailingZeros()); - case Intrinsic::ctlz: - return ConstantInt::get(Ty, Op->getValue().countLeadingZeros()); case Intrinsic::convert_from_fp16: { APFloat Val(Op->getValue()); @@ -1300,24 +1304,31 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands) { } } - if (ConstantVector *Op = dyn_cast(Operands[0])) { + // Support ConstantVector in case we have an Undef in the top. + if (isa(Operands[0]) || + isa(Operands[0])) { + Constant *Op = cast(Operands[0]); switch (F->getIntrinsicID()) { default: break; case Intrinsic::x86_sse_cvtss2si: case Intrinsic::x86_sse_cvtss2si64: case Intrinsic::x86_sse2_cvtsd2si: case Intrinsic::x86_sse2_cvtsd2si64: - if (ConstantFP *FPOp = dyn_cast(Op->getOperand(0))) - return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/false, Ty); + if (ConstantFP *FPOp = + dyn_cast_or_null(Op->getAggregateElement(0U))) + return ConstantFoldConvertToInt(FPOp->getValueAPF(), + /*roundTowardZero=*/false, Ty); case Intrinsic::x86_sse_cvttss2si: case Intrinsic::x86_sse_cvttss2si64: case Intrinsic::x86_sse2_cvttsd2si: case Intrinsic::x86_sse2_cvttsd2si64: - if (ConstantFP *FPOp = dyn_cast(Op->getOperand(0))) - return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/true, Ty); + if (ConstantFP *FPOp = + dyn_cast_or_null(Op->getAggregateElement(0U))) + return ConstantFoldConvertToInt(FPOp->getValueAPF(), + /*roundTowardZero=*/true, Ty); } } - + if (isa(Operands[0])) { if (F->getIntrinsicID() == Intrinsic::bswap) return Operands[0]; @@ -1337,16 +1348,21 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands) { if (ConstantFP *Op2 = dyn_cast(Operands[1])) { if (Op2->getType() != Op1->getType()) return 0; - + double Op2V = Ty->isFloatTy() ? (double)Op2->getValueAPF().convertToFloat(): Op2->getValueAPF().convertToDouble(); - if (Name == "pow") + if (F->getIntrinsicID() == Intrinsic::pow) { return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); - if (Name == "fmod") + } + if (!TLI) + return 0; + if (Name == "pow" && TLI->has(LibFunc::pow)) + return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); + if (Name == "fmod" && TLI->has(LibFunc::fmod)) return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); - if (Name == "atan2") + if (Name == "atan2" && TLI->has(LibFunc::atan2)) return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); } else if (ConstantInt *Op2C = dyn_cast(Operands[1])) { if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy()) @@ -1361,7 +1377,6 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands) { return 0; } - if (ConstantInt *Op1 = dyn_cast(Operands[0])) { if (ConstantInt *Op2 = dyn_cast(Operands[1])) { switch (F->getIntrinsicID()) { @@ -1375,7 +1390,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands) { APInt Res; bool Overflow; switch (F->getIntrinsicID()) { - default: assert(0 && "Invalid case"); + default: llvm_unreachable("Invalid case"); case Intrinsic::sadd_with_overflow: Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow); break; @@ -1401,6 +1416,14 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands) { }; return ConstantStruct::get(cast(F->getReturnType()), Ops); } + case Intrinsic::cttz: + // FIXME: This should check for Op2 == 1, and become unreachable if + // Op1 == 0. + return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros()); + case Intrinsic::ctlz: + // FIXME: This should check for Op2 == 1, and become unreachable if + // Op1 == 0. + return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros()); } } diff --git a/contrib/llvm/lib/Analysis/DIBuilder.cpp b/contrib/llvm/lib/Analysis/DIBuilder.cpp index bfa429d54120..85913b11bef4 100644 --- a/contrib/llvm/lib/Analysis/DIBuilder.cpp +++ b/contrib/llvm/lib/Analysis/DIBuilder.cpp @@ -17,6 +17,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" using namespace llvm; @@ -76,10 +77,11 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, StringRef Directory, StringRef Producer, bool isOptimized, StringRef Flags, unsigned RunTimeVer) { - assert (Lang <= dwarf::DW_LANG_D && Lang >= dwarf::DW_LANG_C89 - && "Invalid Language tag"); - assert (!Filename.empty() - && "Unable to create compile unit without filename"); + assert(((Lang <= dwarf::DW_LANG_Python && Lang >= dwarf::DW_LANG_C89) || + (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) && + "Invalid Language tag"); + assert(!Filename.empty() && + "Unable to create compile unit without filename"); Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; TempEnumTypes = MDNode::getTemporary(VMContext, TElts); Value *THElts[] = { TempEnumTypes }; @@ -189,7 +191,7 @@ DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, return DIType(MDNode::get(VMContext, Elts)); } -/// createQaulifiedType - Create debugging information entry for a qualified +/// createQualifiedType - Create debugging information entry for a qualified /// type, e.g. 'const int'. DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { // Qualified types are encoded in DIDerivedType format. @@ -358,13 +360,58 @@ DIType DIBuilder::createObjCIVar(StringRef Name, return DIType(MDNode::get(VMContext, Elts)); } +/// createObjCIVar - Create debugging information entry for Objective-C +/// instance variable. +DIType DIBuilder::createObjCIVar(StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + DIType Ty, MDNode *PropertyNode) { + // TAG_member is encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_member), + getNonCompileUnitScope(File), + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + Ty, + PropertyNode + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createObjCProperty - Create debugging information entry for Objective-C +/// property. +DIObjCProperty DIBuilder::createObjCProperty(StringRef Name, + DIFile File, unsigned LineNumber, + StringRef GetterName, + StringRef SetterName, + unsigned PropertyAttributes, + DIType Ty) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_APPLE_property), + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + MDString::get(VMContext, GetterName), + MDString::get(VMContext, SetterName), + ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes), + Ty + }; + return DIObjCProperty(MDNode::get(VMContext, Elts)); +} + /// createClassType - Create debugging information entry for a class. DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, DIType DerivedFrom, DIArray Elements, - MDNode *VTableHoder, MDNode *TemplateParams) { + MDNode *VTableHolder, MDNode *TemplateParams) { // TAG_class_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_class_type), @@ -379,7 +426,7 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, DerivedFrom, Elements, ConstantInt::get(Type::getInt32Ty(VMContext), 0), - VTableHoder, + VTableHolder, TemplateParams }; return DIType(MDNode::get(VMContext, Elts)); @@ -440,7 +487,7 @@ DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + NULL, Elements, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -465,7 +512,7 @@ DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), ConstantInt::get(Type::getInt64Ty(VMContext), 0), ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + NULL, Elements, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -484,9 +531,9 @@ DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), 0), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + NULL, ParameterTypes, ConstantInt::get(Type::getInt32Ty(VMContext), 0), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -500,7 +547,7 @@ DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, - DIArray Elements) { + DIArray Elements) { // TAG_enumeration_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type), @@ -512,7 +559,7 @@ DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt32Ty(VMContext), 0), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + NULL, Elements, ConstantInt::get(Type::getInt32Ty(VMContext), 0), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -628,6 +675,31 @@ DIType DIBuilder::createTemporaryType(DIFile F) { return DIType(Node); } +/// createForwardDecl - Create a temporary forward-declared type that +/// can be RAUW'd if the full type is seen. +DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIFile F, + unsigned Line, unsigned RuntimeLang) { + // Create a temporary MDNode. + Value *Elts[] = { + GetTagConstant(VMContext, Tag), + NULL, // TheCU + MDString::get(VMContext, Name), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), Line), + // To ease transition include sizes etc of 0. + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), + DIDescriptor::FlagFwdDecl), + NULL, + DIArray(), + ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang) + }; + MDNode *Node = MDNode::getTemporary(VMContext, Elts); + return DIType(Node); +} + /// getOrCreateArray - Get a DIArray, create one if required. DIArray DIBuilder::getOrCreateArray(ArrayRef Elements) { if (Elements.empty()) { @@ -738,7 +810,7 @@ DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope, Elts.push_back(MDString::get(VMContext, Name)); Elts.push_back(F); Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), - (LineNo | (ArgNo << 24)))); + (LineNo | (ArgNo << 24)))); Elts.push_back(Ty); Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); @@ -754,6 +826,7 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, DIFile File, unsigned LineNo, DIType Ty, bool isLocalToUnit, bool isDefinition, + unsigned ScopeLine, unsigned Flags, bool isOptimized, Function *Fn, MDNode *TParams, @@ -777,13 +850,14 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt32Ty(VMContext), 0), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + NULL, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), Fn, TParams, Decl, - THolder + THolder, + ConstantInt::get(Type::getInt32Ty(VMContext), ScopeLine) }; MDNode *Node = MDNode::get(VMContext, Elts); @@ -831,7 +905,9 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, Fn, TParam, llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - THolder + THolder, + // FIXME: Do we want to use a different scope lines? + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) }; MDNode *Node = MDNode::get(VMContext, Elts); return DISubprogram(Node); @@ -854,7 +930,7 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, /// createLexicalBlockFile - This creates a new MDNode that encapsulates /// an existing scope with a new filename. DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope, - DIFile File) { + DIFile File) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block), Scope, diff --git a/contrib/llvm/lib/Analysis/DebugInfo.cpp b/contrib/llvm/lib/Analysis/DebugInfo.cpp index 44457d3c3de9..f61a8f3a5eb9 100644 --- a/contrib/llvm/lib/Analysis/DebugInfo.cpp +++ b/contrib/llvm/lib/Analysis/DebugInfo.cpp @@ -68,7 +68,7 @@ uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const { return 0; if (Elt < DbgNode->getNumOperands()) - if (ConstantInt *CI = dyn_cast(DbgNode->getOperand(Elt))) + if (ConstantInt *CI = dyn_cast_or_null(DbgNode->getOperand(Elt))) return CI->getZExtValue(); return 0; @@ -289,6 +289,10 @@ bool DIDescriptor::isEnumerator() const { return DbgNode && getTag() == dwarf::DW_TAG_enumerator; } +/// isObjCProperty - Return true if the specified tag is DW_TAG +bool DIDescriptor::isObjCProperty() const { + return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property; +} //===----------------------------------------------------------------------===// // Simple Descriptor Constructors and other Methods //===----------------------------------------------------------------------===// @@ -373,6 +377,19 @@ bool DICompileUnit::Verify() const { return true; } +/// Verify - Verify that an ObjC property is well formed. +bool DIObjCProperty::Verify() const { + if (!DbgNode) + return false; + unsigned Tag = getTag(); + if (Tag != dwarf::DW_TAG_APPLE_property) return false; + DIType Ty = getType(); + if (!Ty.Verify()) return false; + + // Don't worry about the rest of the strings for now. + return true; +} + /// Verify - Verify that a type descriptor is well formed. bool DIType::Verify() const { if (!DbgNode) @@ -482,6 +499,7 @@ bool DINameSpace::Verify() const { /// return base type size. uint64_t DIDerivedType::getOriginalTypeSize() const { unsigned Tag = getTag(); + if (Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type || Tag == dwarf::DW_TAG_restrict_type) { @@ -490,7 +508,13 @@ uint64_t DIDerivedType::getOriginalTypeSize() const { // approach. if (!BaseType.isValid()) return getSizeInBits(); - if (BaseType.isDerivedType()) + // If this is a derived type, go ahead and get the base type, unless + // it's a reference then it's just the size of the field. Pointer types + // have no need of this since they're a different type of qualification + // on the type. + if (BaseType.getTag() == dwarf::DW_TAG_reference_type) + return getSizeInBits(); + else if (BaseType.isDerivedType()) return DIDerivedType(BaseType).getOriginalTypeSize(); else return BaseType.getSizeInBits(); @@ -499,6 +523,13 @@ uint64_t DIDerivedType::getOriginalTypeSize() const { return getSizeInBits(); } +/// getObjCProperty - Return property node, if this ivar is associated with one. +MDNode *DIDerivedType::getObjCProperty() const { + if (getVersion() <= LLVMDebugVersion11 || DbgNode->getNumOperands() <= 10) + return NULL; + return dyn_cast_or_null(DbgNode->getOperand(10)); +} + /// isInlinedFnArgument - Return true if this variable provides debugging /// information for an inlined function arguments. bool DIVariable::isInlinedFnArgument(const Function *CurFn) { @@ -565,8 +596,7 @@ StringRef DIScope::getFilename() const { return DIType(DbgNode).getFilename(); if (isFile()) return DIFile(DbgNode).getFilename(); - assert(0 && "Invalid DIScope!"); - return StringRef(); + llvm_unreachable("Invalid DIScope!"); } StringRef DIScope::getDirectory() const { @@ -586,8 +616,7 @@ StringRef DIScope::getDirectory() const { return DIType(DbgNode).getDirectory(); if (isFile()) return DIFile(DbgNode).getDirectory(); - assert(0 && "Invalid DIScope!"); - return StringRef(); + llvm_unreachable("Invalid DIScope!"); } DIArray DICompileUnit::getEnumTypes() const { @@ -631,6 +660,32 @@ DIArray DICompileUnit::getGlobalVariables() const { return DIArray(); } +//===----------------------------------------------------------------------===// +// DIDescriptor: vtable anchors for all descriptors. +//===----------------------------------------------------------------------===// + +void DIScope::anchor() { } + +void DICompileUnit::anchor() { } + +void DIFile::anchor() { } + +void DIType::anchor() { } + +void DIBasicType::anchor() { } + +void DIDerivedType::anchor() { } + +void DICompositeType::anchor() { } + +void DISubprogram::anchor() { } + +void DILexicalBlock::anchor() { } + +void DINameSpace::anchor() { } + +void DILexicalBlockFile::anchor() { } + //===----------------------------------------------------------------------===// // DIDescriptor: dump routines for all descriptors. //===----------------------------------------------------------------------===// @@ -679,8 +734,13 @@ void DIType::print(raw_ostream &OS) const { if (isBasicType()) DIBasicType(DbgNode).print(OS); - else if (isDerivedType()) - DIDerivedType(DbgNode).print(OS); + else if (isDerivedType()) { + DIDerivedType DTy = DIDerivedType(DbgNode); + DTy.print(OS); + DICompositeType CTy = getDICompositeType(DTy); + if (CTy.Verify()) + CTy.print(OS); + } else if (isCompositeType()) DICompositeType(DbgNode).print(OS); else { @@ -698,7 +758,9 @@ void DIBasicType::print(raw_ostream &OS) const { /// print - Print derived type. void DIDerivedType::print(raw_ostream &OS) const { - OS << "\n\t Derived From: "; getTypeDerivedFrom().print(OS); + OS << "\n\t Derived From: "; + getTypeDerivedFrom().print(OS); + OS << "\n\t"; } /// print - Print composite type. @@ -725,6 +787,9 @@ void DISubprogram::print(raw_ostream &OS) const { if (isDefinition()) OS << " [def] "; + if (getScopeLineNumber() != getLineNumber()) + OS << " [Scope: " << getScopeLineNumber() << "] "; + OS << "\n"; } @@ -927,9 +992,30 @@ DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) { /// processModule - Process entire module and collect debug info. void DebugInfoFinder::processModule(Module &M) { - if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) - for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) - addCompileUnit(DICompileUnit(CU_Nodes->getOperand(i))); + if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) { + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + DICompileUnit CU(CU_Nodes->getOperand(i)); + addCompileUnit(CU); + if (CU.getVersion() > LLVMDebugVersion10) { + DIArray GVs = CU.getGlobalVariables(); + for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) { + DIGlobalVariable DIG(GVs.getElement(i)); + if (addGlobalVariable(DIG)) + processType(DIG.getType()); + } + DIArray SPs = CU.getSubprograms(); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) + processSubprogram(DISubprogram(SPs.getElement(i))); + DIArray EnumTypes = CU.getEnumTypes(); + for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) + processType(DIType(EnumTypes.getElement(i))); + DIArray RetainedTypes = CU.getRetainedTypes(); + for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) + processType(DIType(RetainedTypes.getElement(i))); + return; + } + } + } for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI) diff --git a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp index 6de4e1e1d7de..1604576ec4ae 100644 --- a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp +++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp @@ -35,6 +35,8 @@ namespace { }; } +void DominanceFrontier::anchor() { } + const DominanceFrontier::DomSetType & DominanceFrontier::calculate(const DominatorTree &DT, const DomTreeNode *Node) { diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp index 2e79eab51ff7..0df3e8a38218 100644 --- a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp +++ b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp @@ -127,16 +127,9 @@ class BasicCallGraph : public ModulePass, public CallGraph { } } - // Loop over all of the users of the function, looking for non-call uses. - for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I){ - User *U = *I; - if ((!isa(U) && !isa(U)) - || !CallSite(cast(U)).isCallee(I)) { - // Not a call, or being used as a parameter rather than as the callee. - ExternalCallingNode->addCalledFunction(CallSite(), Node); - break; - } - } + // If this function has its address taken, anything could call it. + if (F->hasAddressTaken()) + ExternalCallingNode->addCalledFunction(CallSite(), Node); // If this function is not defined in this translation unit, it could call // anything. diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp index b226d66cd78a..c1d8e3e65a62 100644 --- a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp @@ -21,6 +21,7 @@ #include "llvm/Instructions.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -467,6 +468,11 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { } else if (isMalloc(&cast(*II)) || isFreeCall(&cast(*II))) { FunctionEffect |= ModRef; + } else if (IntrinsicInst *Intrinsic = dyn_cast(&*II)) { + // The callgraph doesn't include intrinsic calls. + Function *Callee = Intrinsic->getCalledFunction(); + ModRefBehavior Behaviour = AliasAnalysis::getModRefBehavior(Callee); + FunctionEffect |= (Behaviour & ModRef); } if ((FunctionEffect & Mod) == 0) diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp index d0ca8920ab9f..b80966b65a17 100644 --- a/contrib/llvm/lib/Analysis/IVUsers.cpp +++ b/contrib/llvm/lib/Analysis/IVUsers.cpp @@ -79,10 +79,44 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, return false; } -/// AddUsersIfInteresting - Inspect the specified instruction. If it is a +/// Return true if all loop headers that dominate this block are in simplified +/// form. +static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT, + const LoopInfo *LI, + SmallPtrSet &SimpleLoopNests) { + Loop *NearestLoop = 0; + for (DomTreeNode *Rung = DT->getNode(BB); + Rung; Rung = Rung->getIDom()) { + BasicBlock *DomBB = Rung->getBlock(); + Loop *DomLoop = LI->getLoopFor(DomBB); + if (DomLoop && DomLoop->getHeader() == DomBB) { + // If the domtree walk reaches a loop with no preheader, return false. + if (!DomLoop->isLoopSimplifyForm()) + return false; + // If we have already checked this loop nest, stop checking. + if (SimpleLoopNests.count(DomLoop)) + break; + // If we have not already checked this loop nest, remember the loop + // header nearest to BB. The nearest loop may not contain BB. + if (!NearestLoop) + NearestLoop = DomLoop; + } + } + if (NearestLoop) + SimpleLoopNests.insert(NearestLoop); + return true; +} + +/// AddUsersImpl - Inspect the specified instruction. If it is a /// reducible SCEV, recursively add its users to the IVUsesByStride set and /// return true. Otherwise, return false. -bool IVUsers::AddUsersIfInteresting(Instruction *I) { +bool IVUsers::AddUsersImpl(Instruction *I, + SmallPtrSet &SimpleLoopNests) { + // Add this IV user to the Processed set before returning false to ensure that + // all IV users are members of the set. See IVUsers::isIVUserOrOperand. + if (!Processed.insert(I)) + return true; // Instruction already handled. + if (!SE->isSCEVable(I->getType())) return false; // Void and FP expressions cannot be reduced. @@ -93,9 +127,6 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { if (Width > 64 || (TD && !TD->isLegalInteger(Width))) return false; - if (!Processed.insert(I)) - return true; // Instruction already handled. - // Get the symbolic expression for this instruction. const SCEV *ISE = SE->getSCEV(I); @@ -115,6 +146,18 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { if (isa(User) && Processed.count(User)) continue; + // Only consider IVUsers that are dominated by simplified loop + // headers. Otherwise, SCEVExpander will crash. + BasicBlock *UseBB = User->getParent(); + // A phi's use is live out of its predecessor block. + if (PHINode *PHI = dyn_cast(User)) { + unsigned OperandNo = UI.getOperandNo(); + unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); + UseBB = PHI->getIncomingBlock(ValNo); + } + if (!isSimplifiedLoopNest(UseBB, DT, LI, SimpleLoopNests)) + return false; + // Descend recursively, but not into PHI nodes outside the current loop. // It's important to see the entire expression outside the loop to get // choices that depend on addressing mode use right, although we won't @@ -124,12 +167,12 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { bool AddUserToIVUsers = false; if (LI->getLoopFor(User->getParent()) != L) { if (isa(User) || Processed.count(User) || - !AddUsersIfInteresting(User)) { + !AddUsersImpl(User, SimpleLoopNests)) { DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n' << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; } - } else if (Processed.count(User) || !AddUsersIfInteresting(User)) { + } else if (Processed.count(User) || !AddUsersImpl(User, SimpleLoopNests)) { DEBUG(dbgs() << "FOUND USER: " << *User << '\n' << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; @@ -153,6 +196,15 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { return true; } +bool IVUsers::AddUsersIfInteresting(Instruction *I) { + // SCEVExpander can only handle users that are dominated by simplified loop + // entries. Keep track of all loops that are only dominated by other simple + // loops so we don't traverse the domtree for each user. + SmallPtrSet SimpleLoopNests; + + return AddUsersImpl(I, SimpleLoopNests); +} + IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) { IVUses.push_back(new IVStrideUse(this, User, Operand)); return IVUses.back(); @@ -268,6 +320,7 @@ void IVStrideUse::transformToPostInc(const Loop *L) { void IVStrideUse::deleted() { // Remove this user from the list. + Parent->Processed.erase(this->getUser()); Parent->IVUses.erase(this); // this now dangles! } diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp index e12e322c2a99..3e3d2ab75380 100644 --- a/contrib/llvm/lib/Analysis/InlineCost.cpp +++ b/contrib/llvm/lib/Analysis/InlineCost.cpp @@ -11,645 +11,1012 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "inline-cost" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/CallingConv.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" +#include "llvm/GlobalAlias.h" #include "llvm/Target/TargetData.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" using namespace llvm; -/// callIsSmall - If a call is likely to lower to a single target instruction, -/// or is otherwise deemed small return true. -/// TODO: Perhaps calls like memcpy, strcpy, etc? -bool llvm::callIsSmall(const Function *F) { - if (!F) return false; +STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); - if (F->hasLocalLinkage()) return false; +namespace { - if (!F->hasName()) return false; +class CallAnalyzer : public InstVisitor { + typedef InstVisitor Base; + friend class InstVisitor; - StringRef Name = F->getName(); + // TargetData if available, or null. + const TargetData *const TD; - // These will all likely lower to a single selection DAG node. - if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || - Name == "fabs" || Name == "fabsf" || Name == "fabsl" || - Name == "sin" || Name == "sinf" || Name == "sinl" || - Name == "cos" || Name == "cosf" || Name == "cosl" || - Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) + // The called function. + Function &F; + + int Threshold; + int Cost; + const bool AlwaysInline; + + bool IsRecursive; + bool ExposesReturnsTwice; + bool HasDynamicAlloca; + unsigned NumInstructions, NumVectorInstructions; + int FiftyPercentVectorBonus, TenPercentVectorBonus; + int VectorBonus; + + // While we walk the potentially-inlined instructions, we build up and + // maintain a mapping of simplified values specific to this callsite. The + // idea is to propagate any special information we have about arguments to + // this call through the inlinable section of the function, and account for + // likely simplifications post-inlining. The most important aspect we track + // is CFG altering simplifications -- when we prove a basic block dead, that + // can cause dramatic shifts in the cost of inlining a function. + DenseMap SimplifiedValues; + + // Keep track of the values which map back (through function arguments) to + // allocas on the caller stack which could be simplified through SROA. + DenseMap SROAArgValues; + + // The mapping of caller Alloca values to their accumulated cost savings. If + // we have to disable SROA for one of the allocas, this tells us how much + // cost must be added. + DenseMap SROAArgCosts; + + // Keep track of values which map to a pointer base and constant offset. + DenseMap > ConstantOffsetPtrs; + + // Custom simplification helper routines. + bool isAllocaDerivedArg(Value *V); + bool lookupSROAArgAndCost(Value *V, Value *&Arg, + DenseMap::iterator &CostIt); + void disableSROA(DenseMap::iterator CostIt); + void disableSROA(Value *V); + void accumulateSROACost(DenseMap::iterator CostIt, + int InstructionCost); + bool handleSROACandidate(bool IsSROAValid, + DenseMap::iterator CostIt, + int InstructionCost); + bool isGEPOffsetConstant(GetElementPtrInst &GEP); + bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); + ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); + + // Custom analysis routines. + bool analyzeBlock(BasicBlock *BB); + + // Disable several entry points to the visitor so we don't accidentally use + // them by declaring but not defining them here. + void visit(Module *); void visit(Module &); + void visit(Function *); void visit(Function &); + void visit(BasicBlock *); void visit(BasicBlock &); + + // Provide base case for our instruction visit. + bool visitInstruction(Instruction &I); + + // Our visit overrides. + bool visitAlloca(AllocaInst &I); + bool visitPHI(PHINode &I); + bool visitGetElementPtr(GetElementPtrInst &I); + bool visitBitCast(BitCastInst &I); + bool visitPtrToInt(PtrToIntInst &I); + bool visitIntToPtr(IntToPtrInst &I); + bool visitCastInst(CastInst &I); + bool visitUnaryInstruction(UnaryInstruction &I); + bool visitICmp(ICmpInst &I); + bool visitSub(BinaryOperator &I); + bool visitBinaryOperator(BinaryOperator &I); + bool visitLoad(LoadInst &I); + bool visitStore(StoreInst &I); + bool visitCallSite(CallSite CS); + +public: + CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold) + : TD(TD), F(Callee), Threshold(Threshold), Cost(0), + AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)), + IsRecursive(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), + NumInstructions(0), NumVectorInstructions(0), + FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), + NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), + NumConstantPtrCmps(0), NumConstantPtrDiffs(0), + NumInstructionsSimplified(0), SROACostSavings(0), SROACostSavingsLost(0) { + } + + bool analyzeCall(CallSite CS); + + int getThreshold() { return Threshold; } + int getCost() { return Cost; } + + // Keep a bunch of stats about the cost savings found so we can print them + // out when debugging. + unsigned NumConstantArgs; + unsigned NumConstantOffsetPtrArgs; + unsigned NumAllocaArgs; + unsigned NumConstantPtrCmps; + unsigned NumConstantPtrDiffs; + unsigned NumInstructionsSimplified; + unsigned SROACostSavings; + unsigned SROACostSavingsLost; + + void dump(); +}; + +} // namespace + +/// \brief Test whether the given value is an Alloca-derived function argument. +bool CallAnalyzer::isAllocaDerivedArg(Value *V) { + return SROAArgValues.count(V); +} + +/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to. +/// Returns false if V does not map to a SROA-candidate. +bool CallAnalyzer::lookupSROAArgAndCost( + Value *V, Value *&Arg, DenseMap::iterator &CostIt) { + if (SROAArgValues.empty() || SROAArgCosts.empty()) + return false; + + DenseMap::iterator ArgIt = SROAArgValues.find(V); + if (ArgIt == SROAArgValues.end()) + return false; + + Arg = ArgIt->second; + CostIt = SROAArgCosts.find(Arg); + return CostIt != SROAArgCosts.end(); +} + +/// \brief Disable SROA for the candidate marked by this cost iterator. +/// +/// This markes the candidate as no longer viable for SROA, and adds the cost +/// savings associated with it back into the inline cost measurement. +void CallAnalyzer::disableSROA(DenseMap::iterator CostIt) { + // If we're no longer able to perform SROA we need to undo its cost savings + // and prevent subsequent analysis. + Cost += CostIt->second; + SROACostSavings -= CostIt->second; + SROACostSavingsLost += CostIt->second; + SROAArgCosts.erase(CostIt); +} + +/// \brief If 'V' maps to a SROA candidate, disable SROA for it. +void CallAnalyzer::disableSROA(Value *V) { + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(V, SROAArg, CostIt)) + disableSROA(CostIt); +} + +/// \brief Accumulate the given cost for a particular SROA candidate. +void CallAnalyzer::accumulateSROACost(DenseMap::iterator CostIt, + int InstructionCost) { + CostIt->second += InstructionCost; + SROACostSavings += InstructionCost; +} + +/// \brief Helper for the common pattern of handling a SROA candidate. +/// Either accumulates the cost savings if the SROA remains valid, or disables +/// SROA for the candidate. +bool CallAnalyzer::handleSROACandidate(bool IsSROAValid, + DenseMap::iterator CostIt, + int InstructionCost) { + if (IsSROAValid) { + accumulateSROACost(CostIt, InstructionCost); + return true; + } + + disableSROA(CostIt); + return false; +} + +/// \brief Check whether a GEP's indices are all constant. +/// +/// Respects any simplified values known during the analysis of this callsite. +bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { + for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) + if (!isa(*I) && !SimplifiedValues.lookup(*I)) + return false; + + return true; +} + +/// \brief Accumulate a constant GEP offset into an APInt if possible. +/// +/// Returns false if unable to compute the offset for any reason. Respects any +/// simplified values known during the analysis of this callsite. +bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { + if (!TD) + return false; + + unsigned IntPtrWidth = TD->getPointerSizeInBits(); + assert(IntPtrWidth == Offset.getBitWidth()); + + for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); + GTI != GTE; ++GTI) { + ConstantInt *OpC = dyn_cast(GTI.getOperand()); + if (!OpC) + if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) + OpC = dyn_cast(SimpleOp); + if (!OpC) + return false; + if (OpC->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (StructType *STy = dyn_cast(*GTI)) { + unsigned ElementIdx = OpC->getZExtValue(); + const StructLayout *SL = TD->getStructLayout(STy); + Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); + continue; + } + + APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType())); + Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; + } + return true; +} + +bool CallAnalyzer::visitAlloca(AllocaInst &I) { + // FIXME: Check whether inlining will turn a dynamic alloca into a static + // alloca, and handle that case. + + // We will happily inline static alloca instructions or dynamic alloca + // instructions in always-inline situations. + if (AlwaysInline || I.isStaticAlloca()) + return Base::visitAlloca(I); + + // FIXME: This is overly conservative. Dynamic allocas are inefficient for + // a variety of reasons, and so we would like to not inline them into + // functions which don't currently have a dynamic alloca. This simply + // disables inlining altogether in the presence of a dynamic alloca. + HasDynamicAlloca = true; + return false; +} + +bool CallAnalyzer::visitPHI(PHINode &I) { + // FIXME: We should potentially be tracking values through phi nodes, + // especially when they collapse to a single value due to deleted CFG edges + // during inlining. + + // FIXME: We need to propagate SROA *disabling* through phi nodes, even + // though we don't want to propagate it's bonuses. The idea is to disable + // SROA if it *might* be used in an inappropriate manner. + + // Phi nodes are always zero-cost. + return true; +} + +bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { + Value *SROAArg; + DenseMap::iterator CostIt; + bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(), + SROAArg, CostIt); + + // Try to fold GEPs of constant-offset call site argument pointers. This + // requires target data and inbounds GEPs. + if (TD && I.isInBounds()) { + // Check if we have a base + offset for the pointer. + Value *Ptr = I.getPointerOperand(); + std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); + if (BaseAndOffset.first) { + // Check if the offset of this GEP is constant, and if so accumulate it + // into Offset. + if (!accumulateGEPOffset(cast(I), BaseAndOffset.second)) { + // Non-constant GEPs aren't folded, and disable SROA. + if (SROACandidate) + disableSROA(CostIt); + return false; + } + + // Add the result as a new mapping to Base + Offset. + ConstantOffsetPtrs[&I] = BaseAndOffset; + + // Also handle SROA candidates here, we already know that the GEP is + // all-constant indexed. + if (SROACandidate) + SROAArgValues[&I] = SROAArg; + + return true; + } + } + + if (isGEPOffsetConstant(I)) { + if (SROACandidate) + SROAArgValues[&I] = SROAArg; + + // Constant GEPs are modeled as free. + return true; + } + + // Variable GEPs will require math and will disable SROA. + if (SROACandidate) + disableSROA(CostIt); + return false; +} + +bool CallAnalyzer::visitBitCast(BitCastInst &I) { + // Propagate constants through bitcasts. + if (Constant *COp = dyn_cast(I.getOperand(0))) + if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Track base/offsets through casts + std::pair BaseAndOffset + = ConstantOffsetPtrs.lookup(I.getOperand(0)); + // Casts don't change the offset, just wrap it up. + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + + // Also look for SROA candidates here. + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + // Bitcasts are always zero cost. + return true; +} + +bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { + // Propagate constants through ptrtoint. + if (Constant *COp = dyn_cast(I.getOperand(0))) + if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Track base/offset pairs when converted to a plain integer provided the + // integer is large enough to represent the pointer. + unsigned IntegerSize = I.getType()->getScalarSizeInBits(); + if (TD && IntegerSize >= TD->getPointerSizeInBits()) { + std::pair BaseAndOffset + = ConstantOffsetPtrs.lookup(I.getOperand(0)); + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + } + + // This is really weird. Technically, ptrtoint will disable SROA. However, + // unless that ptrtoint is *used* somewhere in the live basic blocks after + // inlining, it will be nuked, and SROA should proceed. All of the uses which + // would block SROA would also block SROA if applied directly to a pointer, + // and so we can just add the integer in here. The only places where SROA is + // preserved either cannot fire on an integer, or won't in-and-of themselves + // disable SROA (ext) w/o some later use that we would see and disable. + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + // A ptrtoint cast is free so long as the result is large enough to store the + // pointer, and a legal integer type. + return TD && TD->isLegalInteger(IntegerSize) && + IntegerSize >= TD->getPointerSizeInBits(); +} + +bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { + // Propagate constants through ptrtoint. + if (Constant *COp = dyn_cast(I.getOperand(0))) + if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Track base/offset pairs when round-tripped through a pointer without + // modifications provided the integer is not too large. + Value *Op = I.getOperand(0); + unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); + if (TD && IntegerSize <= TD->getPointerSizeInBits()) { + std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Op); + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + } + + // "Propagate" SROA here in the same manner as we do for ptrtoint above. + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + // An inttoptr cast is free so long as the input is a legal integer type + // which doesn't contain values outside the range of a pointer. + return TD && TD->isLegalInteger(IntegerSize) && + IntegerSize <= TD->getPointerSizeInBits(); +} + +bool CallAnalyzer::visitCastInst(CastInst &I) { + // Propagate constants through ptrtoint. + if (Constant *COp = dyn_cast(I.getOperand(0))) + if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. + disableSROA(I.getOperand(0)); + + // No-op casts don't have any cost. + if (I.isLosslessCast()) return true; - // These are all likely to be optimized into something smaller. - if (Name == "pow" || Name == "powf" || Name == "powl" || - Name == "exp2" || Name == "exp2l" || Name == "exp2f" || - Name == "floor" || Name == "floorf" || Name == "ceil" || - Name == "round" || Name == "ffs" || Name == "ffsl" || - Name == "abs" || Name == "labs" || Name == "llabs") + // trunc to a native type is free (assuming the target has compare and + // shift-right of the same width). + if (TD && isa(I) && + TD->isLegalInteger(TD->getTypeSizeInBits(I.getType()))) return true; + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // no-ops on most sane targets. + if (isa(I.getOperand(0))) + return true; + + // Assume the rest of the casts require work. + return false; +} + +bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { + Value *Operand = I.getOperand(0); + Constant *Ops[1] = { dyn_cast(Operand) }; + if (Ops[0] || (Ops[0] = SimplifiedValues.lookup(Operand))) + if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), + Ops, TD)) { + SimplifiedValues[&I] = C; + return true; + } + + // Disable any SROA on the argument to arbitrary unary operators. + disableSROA(Operand); + return false; } -/// analyzeBasicBlock - Fill in the current structure with information gleaned -/// from the specified block. -void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, - const TargetData *TD) { - ++NumBlocks; - unsigned NumInstsBeforeThisBB = NumInsts; - for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); - II != E; ++II) { - if (isa(II)) continue; // PHI nodes don't count. - - // Special handling for calls. - if (isa(II) || isa(II)) { - if (isa(II)) - continue; // Debug intrinsics don't count as size. - - ImmutableCallSite CS(cast(II)); - - if (const Function *F = CS.getCalledFunction()) { - // If a function is both internal and has a single use, then it is - // extremely likely to get inlined in the future (it was probably - // exposed by an interleaved devirtualization pass). - if (F->hasInternalLinkage() && F->hasOneUse()) - ++NumInlineCandidates; - - // If this call is to function itself, then the function is recursive. - // Inlining it into other functions is a bad idea, because this is - // basically just a form of loop peeling, and our metrics aren't useful - // for that case. - if (F == BB->getParent()) - isRecursive = true; +bool CallAnalyzer::visitICmp(ICmpInst &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + // First try to handle simplified comparisons. + if (!isa(LHS)) + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + LHS = SimpleLHS; + if (!isa(RHS)) + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + RHS = SimpleRHS; + if (Constant *CLHS = dyn_cast(LHS)) + if (Constant *CRHS = dyn_cast(RHS)) + if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { + SimplifiedValues[&I] = C; + return true; } - if (!isa(II) && !callIsSmall(CS.getCalledFunction())) { - // Each argument to a call takes on average one instruction to set up. - NumInsts += CS.arg_size(); - - // We don't want inline asm to count as a call - that would prevent loop - // unrolling. The argument setup cost is still real, though. - if (!isa(CS.getCalledValue())) - ++NumCalls; - } - } - - if (const AllocaInst *AI = dyn_cast(II)) { - if (!AI->isStaticAlloca()) - this->usesDynamicAlloca = true; - } - - if (isa(II) || II->getType()->isVectorTy()) - ++NumVectorInsts; - - if (const CastInst *CI = dyn_cast(II)) { - // Noop casts, including ptr <-> int, don't count. - if (CI->isLosslessCast() || isa(CI) || - isa(CI)) - continue; - // trunc to a native type is free (assuming the target has compare and - // shift-right of the same width). - if (isa(CI) && TD && - TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) - continue; - // Result of a cmp instruction is often extended (to be used by other - // cmp instructions, logical or return instructions). These are usually - // nop on most sane targets. - if (isa(CI->getOperand(0))) - continue; - } else if (const GetElementPtrInst *GEPI = dyn_cast(II)){ - // If a GEP has all constant indices, it will probably be folded with - // a load/store. - if (GEPI->hasAllConstantIndices()) - continue; - } - - ++NumInsts; - } - - if (isa(BB->getTerminator())) - ++NumRets; - - // We never want to inline functions that contain an indirectbr. This is - // incorrect because all the blockaddress's (in static global initializers - // for example) would be referring to the original function, and this indirect - // jump would jump from the inlined copy of the function into the original - // function which is extremely undefined behavior. - if (isa(BB->getTerminator())) - containsIndirectBr = true; - - // Remember NumInsts for this BB. - NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; -} - -// CountCodeReductionForConstant - Figure out an approximation for how many -// instructions will be constant folded if the specified value is constant. -// -unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) { - unsigned Reduction = 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - User *U = *UI; - if (isa(U) || isa(U)) { - // We will be able to eliminate all but one of the successors. - const TerminatorInst &TI = cast(*U); - const unsigned NumSucc = TI.getNumSuccessors(); - unsigned Instrs = 0; - for (unsigned I = 0; I != NumSucc; ++I) - Instrs += NumBBInsts[TI.getSuccessor(I)]; - // We don't know which blocks will be eliminated, so use the average size. - Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc; - } else { - // Figure out if this instruction will be removed due to simple constant - // propagation. - Instruction &Inst = cast(*U); - - // We can't constant propagate instructions which have effects or - // read memory. - // - // FIXME: It would be nice to capture the fact that a load from a - // pointer-to-constant-global is actually a *really* good thing to zap. - // Unfortunately, we don't know the pointer that may get propagated here, - // so we can't make this decision. - if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || - isa(Inst)) - continue; - - bool AllOperandsConstant = true; - for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) - if (!isa(Inst.getOperand(i)) && Inst.getOperand(i) != V) { - AllOperandsConstant = false; - break; - } - - if (AllOperandsConstant) { - // We will get to remove this instruction... - Reduction += InlineConstants::InstrCost; - - // And any other instructions that use it which become constants - // themselves. - Reduction += CountCodeReductionForConstant(&Inst); + // Otherwise look for a comparison between constant offset pointers with + // a common base. + Value *LHSBase, *RHSBase; + APInt LHSOffset, RHSOffset; + llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); + if (LHSBase) { + llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); + if (RHSBase && LHSBase == RHSBase) { + // We have common bases, fold the icmp to a constant based on the + // offsets. + Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); + Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); + if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { + SimplifiedValues[&I] = C; + ++NumConstantPtrCmps; + return true; } } } - return Reduction; + + // If the comparison is an equality comparison with null, we can simplify it + // for any alloca-derived argument. + if (I.isEquality() && isa(I.getOperand(1))) + if (isAllocaDerivedArg(I.getOperand(0))) { + // We can actually predict the result of comparisons between an + // alloca-derived value and null. Note that this fires regardless of + // SROA firing. + bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; + SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) + : ConstantInt::getFalse(I.getType()); + return true; + } + + // Finally check for SROA candidates in comparisons. + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (isa(I.getOperand(1))) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; + } + + disableSROA(CostIt); + } + + return false; } -// CountCodeReductionForAlloca - Figure out an approximation of how much smaller -// the function will be if it is inlined into a context where an argument -// becomes an alloca. -// -unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) { - if (!V->getType()->isPointerTy()) return 0; // Not a pointer - unsigned Reduction = 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - Instruction *I = cast(*UI); - if (isa(I) || isa(I)) - Reduction += InlineConstants::InstrCost; - else if (GetElementPtrInst *GEP = dyn_cast(I)) { - // If the GEP has variable indices, we won't be able to do much with it. - if (GEP->hasAllConstantIndices()) - Reduction += CountCodeReductionForAlloca(GEP); - } else if (BitCastInst *BCI = dyn_cast(I)) { - // Track pointer through bitcasts. - Reduction += CountCodeReductionForAlloca(BCI); - } else { - // If there is some other strange instruction, we're not going to be able - // to do much if we inline this. - return 0; +bool CallAnalyzer::visitSub(BinaryOperator &I) { + // Try to handle a special case: we can fold computing the difference of two + // constant-related pointers. + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + Value *LHSBase, *RHSBase; + APInt LHSOffset, RHSOffset; + llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); + if (LHSBase) { + llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); + if (RHSBase && LHSBase == RHSBase) { + // We have common bases, fold the subtract to a constant based on the + // offsets. + Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); + Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); + if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { + SimplifiedValues[&I] = C; + ++NumConstantPtrDiffs; + return true; + } } } - return Reduction; + // Otherwise, fall back to the generic logic for simplifying and handling + // instructions. + return Base::visitSub(I); } -/// analyzeFunction - Fill in the current structure with information gleaned -/// from the specified function. -void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) { - // If this function contains a call to setjmp or _setjmp, never inline - // it. This is a hack because we depend on the user marking their local - // variables as volatile if they are live across a setjmp call, and they - // probably won't do this in callers. - if (F->callsFunctionThatReturnsTwice()) - callsSetJmp = true; +bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + if (!isa(LHS)) + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + LHS = SimpleLHS; + if (!isa(RHS)) + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + RHS = SimpleRHS; + Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD); + if (Constant *C = dyn_cast_or_null(SimpleV)) { + SimplifiedValues[&I] = C; + return true; + } - // Look at the size of the callee. - for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - analyzeBasicBlock(&*BB, TD); + // Disable any SROA on arguments to arbitrary, unsimplified binary operators. + disableSROA(LHS); + disableSROA(RHS); + + return false; } -/// analyzeFunction - Fill in the current structure with information gleaned -/// from the specified function. -void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F, - const TargetData *TD) { - Metrics.analyzeFunction(F, TD); +bool CallAnalyzer::visitLoad(LoadInst &I) { + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (I.isSimple()) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; + } - // A function with exactly one return has it removed during the inlining - // process (see InlineFunction), so don't count it. - // FIXME: This knowledge should really be encoded outside of FunctionInfo. - if (Metrics.NumRets==1) - --Metrics.NumInsts; + disableSROA(CostIt); + } - // Check out all of the arguments to the function, figuring out how much - // code can be eliminated if one of the arguments is a constant. - ArgumentWeights.reserve(F->arg_size()); - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) - ArgumentWeights.push_back(ArgInfo(Metrics.CountCodeReductionForConstant(I), - Metrics.CountCodeReductionForAlloca(I))); + return false; } -/// NeverInline - returns true if the function should never be inlined into -/// any caller -bool InlineCostAnalyzer::FunctionInfo::NeverInline() { - return (Metrics.callsSetJmp || Metrics.isRecursive || - Metrics.containsIndirectBr); +bool CallAnalyzer::visitStore(StoreInst &I) { + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (I.isSimple()) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; + } + + disableSROA(CostIt); + } + + return false; } -// getSpecializationBonus - The heuristic used to determine the per-call -// performance boost for using a specialization of Callee with argument -// specializedArgNo replaced by a constant. -int InlineCostAnalyzer::getSpecializationBonus(Function *Callee, - SmallVectorImpl &SpecializedArgNos) -{ - if (Callee->mayBeOverridden()) + +bool CallAnalyzer::visitCallSite(CallSite CS) { + if (CS.isCall() && cast(CS.getInstruction())->canReturnTwice() && + !F.hasFnAttr(Attribute::ReturnsTwice)) { + // This aborts the entire analysis. + ExposesReturnsTwice = true; + return false; + } + + if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { + switch (II->getIntrinsicID()) { + default: + return Base::visitCallSite(CS); + + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + // SROA can usually chew through these intrinsics and they have no cost + // so don't pay the price of analyzing them in detail. + return true; + } + } + + if (Function *F = CS.getCalledFunction()) { + if (F == CS.getInstruction()->getParent()->getParent()) { + // This flag will fully abort the analysis, so don't bother with anything + // else. + IsRecursive = true; + return false; + } + + if (!callIsSmall(F)) { + // We account for the average 1 instruction per call argument setup + // here. + Cost += CS.arg_size() * InlineConstants::InstrCost; + + // Everything other than inline ASM will also have a significant cost + // merely from making the call. + if (!isa(CS.getCalledValue())) + Cost += InlineConstants::CallPenalty; + } + + return Base::visitCallSite(CS); + } + + // Otherwise we're in a very special case -- an indirect function call. See + // if we can be particularly clever about this. + Value *Callee = CS.getCalledValue(); + + // First, pay the price of the argument setup. We account for the average + // 1 instruction per call argument setup here. + Cost += CS.arg_size() * InlineConstants::InstrCost; + + // Next, check if this happens to be an indirect function call to a known + // function in this inline context. If not, we've done all we can. + Function *F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); + if (!F) + return Base::visitCallSite(CS); + + // If we have a constant that we are calling as a function, we can peer + // through it and see the function target. This happens not infrequently + // during devirtualization and so we want to give it a hefty bonus for + // inlining, but cap that bonus in the event that inlining wouldn't pan + // out. Pretend to inline the function, with a custom threshold. + CallAnalyzer CA(TD, *F, InlineConstants::IndirectCallThreshold); + if (CA.analyzeCall(CS)) { + // We were able to inline the indirect call! Subtract the cost from the + // bonus we want to apply, but don't go below zero. + Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); + } + + return Base::visitCallSite(CS); +} + +bool CallAnalyzer::visitInstruction(Instruction &I) { + // We found something we don't understand or can't handle. Mark any SROA-able + // values in the operand list as no longer viable. + for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) + disableSROA(*OI); + + return false; +} + + +/// \brief Analyze a basic block for its contribution to the inline cost. +/// +/// This method walks the analyzer over every instruction in the given basic +/// block and accounts for their cost during inlining at this callsite. It +/// aborts early if the threshold has been exceeded or an impossible to inline +/// construct has been detected. It returns false if inlining is no longer +/// viable, and true if inlining remains viable. +bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { + for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end()); + I != E; ++I) { + ++NumInstructions; + if (isa(I) || I->getType()->isVectorTy()) + ++NumVectorInstructions; + + // If the instruction simplified to a constant, there is no cost to this + // instruction. Visit the instructions using our InstVisitor to account for + // all of the per-instruction logic. The visit tree returns true if we + // consumed the instruction in any way, and false if the instruction's base + // cost should count against inlining. + if (Base::visit(I)) + ++NumInstructionsSimplified; + else + Cost += InlineConstants::InstrCost; + + // If the visit this instruction detected an uninlinable pattern, abort. + if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca) + return false; + + if (NumVectorInstructions > NumInstructions/2) + VectorBonus = FiftyPercentVectorBonus; + else if (NumVectorInstructions > NumInstructions/10) + VectorBonus = TenPercentVectorBonus; + else + VectorBonus = 0; + + // Check if we've past the threshold so we don't spin in huge basic + // blocks that will never inline. + if (!AlwaysInline && Cost > (Threshold + VectorBonus)) + return false; + } + + return true; +} + +/// \brief Compute the base pointer and cumulative constant offsets for V. +/// +/// This strips all constant offsets off of V, leaving it the base pointer, and +/// accumulates the total constant offset applied in the returned constant. It +/// returns 0 if V is not a pointer, and returns the constant '0' if there are +/// no constant offsets applied. +ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { + if (!TD || !V->getType()->isPointerTy()) return 0; - int Bonus = 0; - // If this function uses the coldcc calling convention, prefer not to - // specialize it. - if (Callee->getCallingConv() == CallingConv::Cold) - Bonus -= InlineConstants::ColdccPenalty; + unsigned IntPtrWidth = TD->getPointerSizeInBits(); + APInt Offset = APInt::getNullValue(IntPtrWidth); - // Get information about the callee. - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - - // If we haven't calculated this information yet, do so now. - if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee, TD); - - unsigned ArgNo = 0; - unsigned i = 0; - for (Function::arg_iterator I = Callee->arg_begin(), E = Callee->arg_end(); - I != E; ++I, ++ArgNo) - if (ArgNo == SpecializedArgNos[i]) { - ++i; - Bonus += CountBonusForConstant(I); + // Even though we don't look through PHI nodes, we could be called on an + // instruction in an unreachable block, which may be on a cycle. + SmallPtrSet Visited; + Visited.insert(V); + do { + if (GEPOperator *GEP = dyn_cast(V)) { + if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) + return 0; + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast(V)) { + if (GA->mayBeOverridden()) + break; + V = GA->getAliasee(); + } else { + break; } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } while (Visited.insert(V)); - // Calls usually take a long time, so they make the specialization gain - // smaller. - Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; - - return Bonus; + Type *IntPtrTy = TD->getIntPtrType(V->getContext()); + return cast(ConstantInt::get(IntPtrTy, Offset)); } -// ConstantFunctionBonus - Figure out how much of a bonus we can get for -// possibly devirtualizing a function. We'll subtract the size of the function -// we may wish to inline from the indirect call bonus providing a limit on -// growth. Leave an upper limit of 0 for the bonus - we don't want to penalize -// inlining because we decide we don't want to give a bonus for -// devirtualizing. -int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) { +/// \brief Analyze a call site for potential inlining. +/// +/// Returns true if inlining this call is viable, and false if it is not +/// viable. It computes the cost and adjusts the threshold based on numerous +/// factors and heuristics. If this method returns false but the computed cost +/// is below the computed threshold, then inlining was forcibly disabled by +/// some artifact of the rountine. +bool CallAnalyzer::analyzeCall(CallSite CS) { + ++NumCallsAnalyzed; - // This could just be NULL. - if (!C) return 0; + // Track whether the post-inlining function would have more than one basic + // block. A single basic block is often intended for inlining. Balloon the + // threshold by 50% until we pass the single-BB phase. + bool SingleBB = true; + int SingleBBBonus = Threshold / 2; + Threshold += SingleBBBonus; - Function *F = dyn_cast(C); - if (!F) return 0; + // Unless we are always-inlining, perform some tweaks to the cost and + // threshold based on the direct callsite information. + if (!AlwaysInline) { + // We want to more aggressively inline vector-dense kernels, so up the + // threshold, and we'll lower it if the % of vector instructions gets too + // low. + assert(NumInstructions == 0); + assert(NumVectorInstructions == 0); + FiftyPercentVectorBonus = Threshold; + TenPercentVectorBonus = Threshold / 2; - int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F); - return (Bonus > 0) ? 0 : Bonus; -} + // Subtract off one instruction per call argument as those will be free after + // inlining. + Cost -= CS.arg_size() * InlineConstants::InstrCost; -// CountBonusForConstant - Figure out an approximation for how much per-call -// performance boost we can expect if the specified value is constant. -int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) { - unsigned Bonus = 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - User *U = *UI; - if (CallInst *CI = dyn_cast(U)) { - // Turning an indirect call into a direct call is a BIG win - if (CI->getCalledValue() == V) - Bonus += ConstantFunctionBonus(CallSite(CI), C); - } else if (InvokeInst *II = dyn_cast(U)) { - // Turning an indirect call into a direct call is a BIG win - if (II->getCalledValue() == V) - Bonus += ConstantFunctionBonus(CallSite(II), C); + // If there is only one call of the function, and it has internal linkage, + // the cost of inlining it drops dramatically. + if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction()) + Cost += InlineConstants::LastCallToStaticBonus; + + // If the instruction after the call, or if the normal destination of the + // invoke is an unreachable instruction, the function is noreturn. As such, + // there is little point in inlining this unless there is literally zero cost. + if (InvokeInst *II = dyn_cast(CS.getInstruction())) { + if (isa(II->getNormalDest()->begin())) + Threshold = 1; + } else if (isa(++BasicBlock::iterator(CS.getInstruction()))) + Threshold = 1; + + // If this function uses the coldcc calling convention, prefer not to inline + // it. + if (F.getCallingConv() == CallingConv::Cold) + Cost += InlineConstants::ColdccPenalty; + + // Check if we're done. This can happen due to bonuses and penalties. + if (Cost > Threshold) + return false; + } + + if (F.empty()) + return true; + + // Track whether we've seen a return instruction. The first return + // instruction is free, as at least one will usually disappear in inlining. + bool HasReturn = false; + + // Populate our simplified values by mapping from function arguments to call + // arguments with known important simplifications. + CallSite::arg_iterator CAI = CS.arg_begin(); + for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); + FAI != FAE; ++FAI, ++CAI) { + assert(CAI != CS.arg_end()); + if (Constant *C = dyn_cast(CAI)) + SimplifiedValues[FAI] = C; + + Value *PtrArg = *CAI; + if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { + ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); + + // We can SROA any pointer arguments derived from alloca instructions. + if (isa(PtrArg)) { + SROAArgValues[FAI] = PtrArg; + SROAArgCosts[PtrArg] = 0; + } } - // FIXME: Eliminating conditional branches and switches should - // also yield a per-call performance boost. - else { - // Figure out the bonuses that wll accrue due to simple constant - // propagation. - Instruction &Inst = cast(*U); + } + NumConstantArgs = SimplifiedValues.size(); + NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); + NumAllocaArgs = SROAArgValues.size(); - // We can't constant propagate instructions which have effects or - // read memory. - // - // FIXME: It would be nice to capture the fact that a load from a - // pointer-to-constant-global is actually a *really* good thing to zap. - // Unfortunately, we don't know the pointer that may get propagated here, - // so we can't make this decision. - if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || - isa(Inst)) - continue; + // The worklist of live basic blocks in the callee *after* inlining. We avoid + // adding basic blocks of the callee which can be proven to be dead for this + // particular call site in order to get more accurate cost estimates. This + // requires a somewhat heavyweight iteration pattern: we need to walk the + // basic blocks in a breadth-first order as we insert live successors. To + // accomplish this, prioritizing for small iterations because we exit after + // crossing our threshold, we use a small-size optimized SetVector. + typedef SetVector, + SmallPtrSet > BBSetVector; + BBSetVector BBWorklist; + BBWorklist.insert(&F.getEntryBlock()); + // Note that we *must not* cache the size, this loop grows the worklist. + for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { + // Bail out the moment we cross the threshold. This means we'll under-count + // the cost, but only when undercounting doesn't matter. + if (!AlwaysInline && Cost > (Threshold + VectorBonus)) + break; - bool AllOperandsConstant = true; - for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) - if (!isa(Inst.getOperand(i)) && Inst.getOperand(i) != V) { - AllOperandsConstant = false; - break; + BasicBlock *BB = BBWorklist[Idx]; + if (BB->empty()) + continue; + + // Handle the terminator cost here where we can track returns and other + // function-wide constructs. + TerminatorInst *TI = BB->getTerminator(); + + // We never want to inline functions that contain an indirectbr. This is + // incorrect because all the blockaddress's (in static global initializers + // for example) would be referring to the original function, and this indirect + // jump would jump from the inlined copy of the function into the original + // function which is extremely undefined behavior. + // FIXME: This logic isn't really right; we can safely inline functions + // with indirectbr's as long as no other function or global references the + // blockaddress of a block within the current function. And as a QOI issue, + // if someone is using a blockaddress without an indirectbr, and that + // reference somehow ends up in another function or global, we probably + // don't want to inline this function. + if (isa(TI)) + return false; + + if (!HasReturn && isa(TI)) + HasReturn = true; + else + Cost += InlineConstants::InstrCost; + + // Analyze the cost of this block. If we blow through the threshold, this + // returns false, and we can bail on out. + if (!analyzeBlock(BB)) { + if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca) + return false; + break; + } + + // Add in the live successors by first checking whether we have terminator + // that may be simplified based on the values simplified by this call. + if (BranchInst *BI = dyn_cast(TI)) { + if (BI->isConditional()) { + Value *Cond = BI->getCondition(); + if (ConstantInt *SimpleCond + = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { + BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); + continue; } + } + } else if (SwitchInst *SI = dyn_cast(TI)) { + Value *Cond = SI->getCondition(); + if (ConstantInt *SimpleCond + = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { + BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); + continue; + } + } - if (AllOperandsConstant) - Bonus += CountBonusForConstant(&Inst); + // If we're unable to select a particular successor, just count all of + // them. + for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; ++TIdx) + BBWorklist.insert(TI->getSuccessor(TIdx)); + + // If we had any successors at this point, than post-inlining is likely to + // have them as well. Note that we assume any basic blocks which existed + // due to branches or switches which folded above will also fold after + // inlining. + if (SingleBB && TI->getNumSuccessors() > 1) { + // Take off the bonus we applied to the threshold. + Threshold -= SingleBBBonus; + SingleBB = false; } } - return Bonus; + Threshold += VectorBonus; + + return AlwaysInline || Cost < Threshold; } -int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { - // Get information about the callee. - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - - // If we haven't calculated this information yet, do so now. - if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee, TD); - - // InlineCost - This value measures how good of an inline candidate this call - // site is to inline. A lower inline cost make is more likely for the call to - // be inlined. This value may go negative. - // - int InlineCost = 0; - - // Compute any size reductions we can expect due to arguments being passed into - // the function. - // - unsigned ArgNo = 0; - CallSite::arg_iterator I = CS.arg_begin(); - for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); - FI != FE; ++I, ++FI, ++ArgNo) { - - // If an alloca is passed in, inlining this function is likely to allow - // significant future optimization possibilities (like scalar promotion, and - // scalarization), so encourage the inlining of the function. - // - if (isa(I)) - InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight; - - // If this is a constant being passed into the function, use the argument - // weights calculated for the callee to determine how much will be folded - // away with this information. - else if (isa(I)) - InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; - } - - // Each argument passed in has a cost at both the caller and the callee - // sides. Measurements show that each argument costs about the same as an - // instruction. - InlineCost -= (CS.arg_size() * InlineConstants::InstrCost); - - // Now that we have considered all of the factors that make the call site more - // likely to be inlined, look at factors that make us not want to inline it. - - // Calls usually take a long time, so they make the inlining gain smaller. - InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; - - // Look at the size of the callee. Each instruction counts as 5. - InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost; - - return InlineCost; +/// \brief Dump stats about this call's analysis. +void CallAnalyzer::dump() { +#define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n" + DEBUG_PRINT_STAT(NumConstantArgs); + DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); + DEBUG_PRINT_STAT(NumAllocaArgs); + DEBUG_PRINT_STAT(NumConstantPtrCmps); + DEBUG_PRINT_STAT(NumConstantPtrDiffs); + DEBUG_PRINT_STAT(NumInstructionsSimplified); + DEBUG_PRINT_STAT(SROACostSavings); + DEBUG_PRINT_STAT(SROACostSavingsLost); +#undef DEBUG_PRINT_STAT } -int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) { - // Get information about the callee. - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - - // If we haven't calculated this information yet, do so now. - if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee, TD); - - bool isDirectCall = CS.getCalledFunction() == Callee; - Instruction *TheCall = CS.getInstruction(); - int Bonus = 0; - - // If there is only one call of the function, and it has internal linkage, - // make it almost guaranteed to be inlined. - // - if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall) - Bonus += InlineConstants::LastCallToStaticBonus; - - // If the instruction after the call, or if the normal destination of the - // invoke is an unreachable instruction, the function is noreturn. As such, - // there is little point in inlining this. - if (InvokeInst *II = dyn_cast(TheCall)) { - if (isa(II->getNormalDest()->begin())) - Bonus += InlineConstants::NoreturnPenalty; - } else if (isa(++BasicBlock::iterator(TheCall))) - Bonus += InlineConstants::NoreturnPenalty; - - // If this function uses the coldcc calling convention, prefer not to inline - // it. - if (Callee->getCallingConv() == CallingConv::Cold) - Bonus += InlineConstants::ColdccPenalty; - - // Add to the inline quality for properties that make the call valuable to - // inline. This includes factors that indicate that the result of inlining - // the function will be optimizable. Currently this just looks at arguments - // passed into the function. - // - CallSite::arg_iterator I = CS.arg_begin(); - for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); - FI != FE; ++I, ++FI) - // Compute any constant bonus due to inlining we want to give here. - if (isa(I)) - Bonus += CountBonusForConstant(FI, cast(I)); - - return Bonus; +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) { + return getInlineCost(CS, CS.getCalledFunction(), Threshold); } -// getInlineCost - The heuristic used to determine if we should inline the -// function call or not. -// -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, - SmallPtrSet &NeverInline) { - return getInlineCost(CS, CS.getCalledFunction(), NeverInline); -} - -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, - Function *Callee, - SmallPtrSet &NeverInline) { - Instruction *TheCall = CS.getInstruction(); - Function *Caller = TheCall->getParent()->getParent(); - +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, + int Threshold) { // Don't inline functions which can be redefined at link-time to mean // something else. Don't inline functions marked noinline or call sites // marked noinline. - if (Callee->mayBeOverridden() || - Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) || - CS.isNoInline()) + if (!Callee || Callee->mayBeOverridden() || + Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline()) return llvm::InlineCost::getNever(); - // Get information about the callee. - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; + DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - // If we haven't calculated this information yet, do so now. - if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee, TD); + CallAnalyzer CA(TD, *Callee, Threshold); + bool ShouldInline = CA.analyzeCall(CS); - // If we should never inline this, return a huge cost. - if (CalleeFI->NeverInline()) + DEBUG(CA.dump()); + + // Check if there was a reason to force inlining or no inlining. + if (!ShouldInline && CA.getCost() < CA.getThreshold()) return InlineCost::getNever(); - - // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we - // could move this up and avoid computing the FunctionInfo for - // things we are going to just return always inline for. This - // requires handling setjmp somewhere else, however. - if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline)) + if (ShouldInline && CA.getCost() >= CA.getThreshold()) return InlineCost::getAlways(); - if (CalleeFI->Metrics.usesDynamicAlloca) { - // Get information about the caller. - FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; - - // If we haven't calculated this information yet, do so now. - if (CallerFI.Metrics.NumBlocks == 0) { - CallerFI.analyzeFunction(Caller, TD); - - // Recompute the CalleeFI pointer, getting Caller could have invalidated - // it. - CalleeFI = &CachedFunctionInfo[Callee]; - } - - // Don't inline a callee with dynamic alloca into a caller without them. - // Functions containing dynamic alloca's are inefficient in various ways; - // don't create more inefficiency. - if (!CallerFI.Metrics.usesDynamicAlloca) - return InlineCost::getNever(); - } - - // InlineCost - This value measures how good of an inline candidate this call - // site is to inline. A lower inline cost make is more likely for the call to - // be inlined. This value may go negative due to the fact that bonuses - // are negative numbers. - // - int InlineCost = getInlineSize(CS, Callee) + getInlineBonuses(CS, Callee); - return llvm::InlineCost::get(InlineCost); -} - -// getSpecializationCost - The heuristic used to determine the code-size -// impact of creating a specialized version of Callee with argument -// SpecializedArgNo replaced by a constant. -InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee, - SmallVectorImpl &SpecializedArgNos) -{ - // Don't specialize functions which can be redefined at link-time to mean - // something else. - if (Callee->mayBeOverridden()) - return llvm::InlineCost::getNever(); - - // Get information about the callee. - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - - // If we haven't calculated this information yet, do so now. - if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee, TD); - - int Cost = 0; - - // Look at the original size of the callee. Each instruction counts as 5. - Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost; - - // Offset that with the amount of code that can be constant-folded - // away with the given arguments replaced by constants. - for (SmallVectorImpl::iterator an = SpecializedArgNos.begin(), - ae = SpecializedArgNos.end(); an != ae; ++an) - Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight; - - return llvm::InlineCost::get(Cost); -} - -// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a -// higher threshold to determine if the function call should be inlined. -float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { - Function *Callee = CS.getCalledFunction(); - - // Get information about the callee. - FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; - - // If we haven't calculated this information yet, do so now. - if (CalleeFI.Metrics.NumBlocks == 0) - CalleeFI.analyzeFunction(Callee, TD); - - float Factor = 1.0f; - // Single BB functions are often written to be inlined. - if (CalleeFI.Metrics.NumBlocks == 1) - Factor += 0.5f; - - // Be more aggressive if the function contains a good chunk (if it mades up - // at least 10% of the instructions) of vector instructions. - if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2) - Factor += 2.0f; - else if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/10) - Factor += 1.5f; - return Factor; -} - -/// growCachedCostInfo - update the cached cost info for Caller after Callee has -/// been inlined. -void -InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { - CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics; - - // For small functions we prefer to recalculate the cost for better accuracy. - if (CallerMetrics.NumBlocks < 10 && CallerMetrics.NumInsts < 1000) { - resetCachedCostInfo(Caller); - return; - } - - // For large functions, we can save a lot of computation time by skipping - // recalculations. - if (CallerMetrics.NumCalls > 0) - --CallerMetrics.NumCalls; - - if (Callee == 0) return; - - CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics; - - // If we don't have metrics for the callee, don't recalculate them just to - // update an approximation in the caller. Instead, just recalculate the - // caller info from scratch. - if (CalleeMetrics.NumBlocks == 0) { - resetCachedCostInfo(Caller); - return; - } - - // Since CalleeMetrics were already calculated, we know that the CallerMetrics - // reference isn't invalidated: both were in the DenseMap. - CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca; - - // FIXME: If any of these three are true for the callee, the callee was - // not inlined into the caller, so I think they're redundant here. - CallerMetrics.callsSetJmp |= CalleeMetrics.callsSetJmp; - CallerMetrics.isRecursive |= CalleeMetrics.isRecursive; - CallerMetrics.containsIndirectBr |= CalleeMetrics.containsIndirectBr; - - CallerMetrics.NumInsts += CalleeMetrics.NumInsts; - CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks; - CallerMetrics.NumCalls += CalleeMetrics.NumCalls; - CallerMetrics.NumVectorInsts += CalleeMetrics.NumVectorInsts; - CallerMetrics.NumRets += CalleeMetrics.NumRets; - - // analyzeBasicBlock counts each function argument as an inst. - if (CallerMetrics.NumInsts >= Callee->arg_size()) - CallerMetrics.NumInsts -= Callee->arg_size(); - else - CallerMetrics.NumInsts = 0; - - // We are not updating the argument weights. We have already determined that - // Caller is a fairly large function, so we accept the loss of precision. -} - -/// clear - empty the cache of inline costs -void InlineCostAnalyzer::clear() { - CachedFunctionInfo.clear(); + return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); } diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index 131cc97d2379..16e7a726595d 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -18,13 +18,17 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "instsimplify" +#include "llvm/GlobalAlias.h" #include "llvm/Operator.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/ConstantRange.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Target/TargetData.h" @@ -37,23 +41,28 @@ STATISTIC(NumExpand, "Number of expansions"); STATISTIC(NumFactor , "Number of factorizations"); STATISTIC(NumReassoc, "Number of reassociations"); -static Value *SimplifyAndInst(Value *, Value *, const TargetData *, - const DominatorTree *, unsigned); -static Value *SimplifyBinOp(unsigned, Value *, Value *, const TargetData *, - const DominatorTree *, unsigned); -static Value *SimplifyCmpInst(unsigned, Value *, Value *, const TargetData *, - const DominatorTree *, unsigned); -static Value *SimplifyOrInst(Value *, Value *, const TargetData *, - const DominatorTree *, unsigned); -static Value *SimplifyXorInst(Value *, Value *, const TargetData *, - const DominatorTree *, unsigned); +struct Query { + const TargetData *TD; + const TargetLibraryInfo *TLI; + const DominatorTree *DT; + + Query(const TargetData *td, const TargetLibraryInfo *tli, + const DominatorTree *dt) : TD(td), TLI(tli), DT(dt) {}; +}; + +static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned); +static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &, + unsigned); +static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &, + unsigned); +static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned); +static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned); +static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned); /// getFalse - For a boolean type, or a vector of boolean type, return false, or /// a vector with every element false, as appropriate for the type. static Constant *getFalse(Type *Ty) { - assert((Ty->isIntegerTy(1) || - (Ty->isVectorTy() && - cast(Ty)->getElementType()->isIntegerTy(1))) && + assert(Ty->getScalarType()->isIntegerTy(1) && "Expected i1 type or a vector of i1!"); return Constant::getNullValue(Ty); } @@ -61,13 +70,25 @@ static Constant *getFalse(Type *Ty) { /// getTrue - For a boolean type, or a vector of boolean type, return true, or /// a vector with every element true, as appropriate for the type. static Constant *getTrue(Type *Ty) { - assert((Ty->isIntegerTy(1) || - (Ty->isVectorTy() && - cast(Ty)->getElementType()->isIntegerTy(1))) && + assert(Ty->getScalarType()->isIntegerTy(1) && "Expected i1 type or a vector of i1!"); return Constant::getAllOnesValue(Ty); } +/// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"? +static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS, + Value *RHS) { + CmpInst *Cmp = dyn_cast(V); + if (!Cmp) + return false; + CmpInst::Predicate CPred = Cmp->getPredicate(); + Value *CLHS = Cmp->getOperand(0), *CRHS = Cmp->getOperand(1); + if (CPred == Pred && CLHS == LHS && CRHS == RHS) + return true; + return CPred == CmpInst::getSwappedPredicate(Pred) && CLHS == RHS && + CRHS == LHS; +} + /// ValueDominatesPHI - Does the given value dominate the specified phi node? static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { Instruction *I = dyn_cast(V); @@ -75,9 +96,20 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { // Arguments and constants dominate all instructions. return true; + // If we are processing instructions (and/or basic blocks) that have not been + // fully added to a function, the parent nodes may still be null. Simply + // return the conservative answer in these cases. + if (!I->getParent() || !P->getParent() || !I->getParent()->getParent()) + return false; + // If we have a DominatorTree then do a precise test. - if (DT) + if (DT) { + if (!DT->isReachableFromEntry(P->getParent())) + return true; + if (!DT->isReachableFromEntry(I->getParent())) + return false; return DT->dominates(I, P); + } // Otherwise, if the instruction is in the entry block, and is not an invoke, // then it obviously dominates all phi nodes. @@ -94,8 +126,8 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { /// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)". /// Returns the simplified value, or null if no simplification was performed. static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, - unsigned OpcToExpand, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { + unsigned OpcToExpand, const Query &Q, + unsigned MaxRecurse) { Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand; // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -107,8 +139,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, // It does! Try turning it into "(A op C) op' (B op C)". Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; // Do "A op C" and "B op C" both simplify? - if (Value *L = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) - if (Value *R = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) { + if (Value *L = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) + if (Value *R = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) { // They do! Return "L op' R" if it simplifies or is already available. // If "L op' R" equals "A op' B" then "L op' R" is just the LHS. if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand) @@ -117,8 +149,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, return LHS; } // Otherwise return "L op' R" if it simplifies. - if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT, - MaxRecurse)) { + if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) { ++NumExpand; return V; } @@ -131,8 +162,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, // It does! Try turning it into "(A op B) op' (A op C)". Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); // Do "A op B" and "A op C" both simplify? - if (Value *L = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) - if (Value *R = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) { + if (Value *L = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) + if (Value *R = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) { // They do! Return "L op' R" if it simplifies or is already available. // If "L op' R" equals "B op' C" then "L op' R" is just the RHS. if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand) @@ -141,8 +172,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, return RHS; } // Otherwise return "L op' R" if it simplifies. - if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT, - MaxRecurse)) { + if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) { ++NumExpand; return V; } @@ -157,8 +187,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)". /// Returns the simplified value, or null if no simplification was performed. static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, - unsigned OpcToExtract, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { + unsigned OpcToExtract, const Query &Q, + unsigned MaxRecurse) { Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract; // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -182,7 +212,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, Value *DD = A == C ? D : C; // Form "A op' (B op DD)" if it simplifies completely. // Does "B op DD" simplify? - if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, B, DD, Q, MaxRecurse)) { // It does! Return "A op' V" if it simplifies or is already available. // If V equals B then "A op' V" is just the LHS. If V equals DD then // "A op' V" is just the RHS. @@ -191,7 +221,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, return V == B ? LHS : RHS; } // Otherwise return "A op' V" if it simplifies. - if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, Q, MaxRecurse)) { ++NumFactor; return W; } @@ -205,7 +235,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, Value *CC = B == D ? C : D; // Form "(A op CC) op' B" if it simplifies completely.. // Does "A op CC" simplify? - if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, A, CC, Q, MaxRecurse)) { // It does! Return "V op' B" if it simplifies or is already available. // If V equals A then "V op' B" is just the LHS. If V equals CC then // "V op' B" is just the RHS. @@ -214,7 +244,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, return V == A ? LHS : RHS; } // Otherwise return "V op' B" if it simplifies. - if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, Q, MaxRecurse)) { ++NumFactor; return W; } @@ -227,9 +257,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// SimplifyAssociativeBinOp - Generic simplifications for associative binary /// operations. Returns the simpler value, or null if none was found. static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, - const TargetData *TD, - const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc; assert(Instruction::isAssociative(Opcode) && "Not an associative operation!"); @@ -247,12 +275,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, Value *C = RHS; // Does "B op C" simplify? - if (Value *V = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) { // It does! Return "A op V" if it simplifies or is already available. // If V equals B then "A op V" is just the LHS. if (V == B) return LHS; // Otherwise return "A op V" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, A, V, TD, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(Opcode, A, V, Q, MaxRecurse)) { ++NumReassoc; return W; } @@ -266,12 +294,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, Value *C = Op1->getOperand(1); // Does "A op B" simplify? - if (Value *V = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) { // It does! Return "V op C" if it simplifies or is already available. // If V equals B then "V op C" is just the RHS. if (V == B) return RHS; // Otherwise return "V op C" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, V, C, TD, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(Opcode, V, C, Q, MaxRecurse)) { ++NumReassoc; return W; } @@ -289,12 +317,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, Value *C = RHS; // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) { // It does! Return "V op B" if it simplifies or is already available. // If V equals A then "V op B" is just the LHS. if (V == A) return LHS; // Otherwise return "V op B" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, V, B, TD, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(Opcode, V, B, Q, MaxRecurse)) { ++NumReassoc; return W; } @@ -308,12 +336,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, Value *C = Op1->getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) { // It does! Return "B op V" if it simplifies or is already available. // If V equals C then "B op V" is just the RHS. if (V == C) return RHS; // Otherwise return "B op V" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, B, V, TD, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(Opcode, B, V, Q, MaxRecurse)) { ++NumReassoc; return W; } @@ -328,9 +356,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, /// evaluating it on both branches of the select results in the same value. /// Returns the common value if so, otherwise returns null. static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, - const TargetData *TD, - const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return 0; @@ -347,11 +373,11 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, Value *TV; Value *FV; if (SI == LHS) { - TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, DT, MaxRecurse); - FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, DT, MaxRecurse); + TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse); + FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse); } else { - TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, DT, MaxRecurse); - FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, DT, MaxRecurse); + TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse); + FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse); } // If they simplified to the same value, then return the common value. @@ -402,8 +428,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, /// result in the same value. Returns the common value if so, otherwise returns /// null. static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const TargetData *TD, - const DominatorTree *DT, + Value *RHS, const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -416,40 +441,67 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, } assert(isa(LHS) && "Not comparing with a select instruction!"); SelectInst *SI = cast(LHS); + Value *Cond = SI->getCondition(); + Value *TV = SI->getTrueValue(); + Value *FV = SI->getFalseValue(); // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it. // Does "cmp TV, RHS" simplify? - if (Value *TCmp = SimplifyCmpInst(Pred, SI->getTrueValue(), RHS, TD, DT, - MaxRecurse)) { - // It does! Does "cmp FV, RHS" simplify? - if (Value *FCmp = SimplifyCmpInst(Pred, SI->getFalseValue(), RHS, TD, DT, - MaxRecurse)) { - // It does! If they simplified to the same value, then use it as the - // result of the original comparison. - if (TCmp == FCmp) - return TCmp; - Value *Cond = SI->getCondition(); - // If the false value simplified to false, then the result of the compare - // is equal to "Cond && TCmp". This also catches the case when the false - // value simplified to false and the true value to true, returning "Cond". - if (match(FCmp, m_Zero())) - if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse)) - return V; - // If the true value simplified to true, then the result of the compare - // is equal to "Cond || FCmp". - if (match(TCmp, m_One())) - if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse)) - return V; - // Finally, if the false value simplified to true and the true value to - // false, then the result of the compare is equal to "!Cond". - if (match(FCmp, m_One()) && match(TCmp, m_Zero())) - if (Value *V = - SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), - TD, DT, MaxRecurse)) - return V; - } + Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, Q, MaxRecurse); + if (TCmp == Cond) { + // It not only simplified, it simplified to the select condition. Replace + // it with 'true'. + TCmp = getTrue(Cond->getType()); + } else if (!TCmp) { + // It didn't simplify. However if "cmp TV, RHS" is equal to the select + // condition then we can replace it with 'true'. Otherwise give up. + if (!isSameCompare(Cond, Pred, TV, RHS)) + return 0; + TCmp = getTrue(Cond->getType()); } + // Does "cmp FV, RHS" simplify? + Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, Q, MaxRecurse); + if (FCmp == Cond) { + // It not only simplified, it simplified to the select condition. Replace + // it with 'false'. + FCmp = getFalse(Cond->getType()); + } else if (!FCmp) { + // It didn't simplify. However if "cmp FV, RHS" is equal to the select + // condition then we can replace it with 'false'. Otherwise give up. + if (!isSameCompare(Cond, Pred, FV, RHS)) + return 0; + FCmp = getFalse(Cond->getType()); + } + + // If both sides simplified to the same value, then use it as the result of + // the original comparison. + if (TCmp == FCmp) + return TCmp; + + // The remaining cases only make sense if the select condition has the same + // type as the result of the comparison, so bail out if this is not so. + if (Cond->getType()->isVectorTy() != RHS->getType()->isVectorTy()) + return 0; + // If the false value simplified to false, then the result of the compare + // is equal to "Cond && TCmp". This also catches the case when the false + // value simplified to false and the true value to true, returning "Cond". + if (match(FCmp, m_Zero())) + if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse)) + return V; + // If the true value simplified to true, then the result of the compare + // is equal to "Cond || FCmp". + if (match(TCmp, m_One())) + if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse)) + return V; + // Finally, if the false value simplified to true and the true value to + // false, then the result of the compare is equal to "!Cond". + if (match(FCmp, m_One()) && match(TCmp, m_Zero())) + if (Value *V = + SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), + Q, MaxRecurse)) + return V; + return 0; } @@ -458,8 +510,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, /// it on the incoming phi values yields the same result for every value. If so /// returns the common value, otherwise returns null. static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return 0; @@ -468,13 +519,13 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, if (isa(LHS)) { PI = cast(LHS); // Bail out if RHS and the phi may be mutually interdependent due to a loop. - if (!ValueDominatesPHI(RHS, PI, DT)) + if (!ValueDominatesPHI(RHS, PI, Q.DT)) return 0; } else { assert(isa(RHS) && "No PHI instruction operand!"); PI = cast(RHS); // Bail out if LHS and the phi may be mutually interdependent due to a loop. - if (!ValueDominatesPHI(LHS, PI, DT)) + if (!ValueDominatesPHI(LHS, PI, Q.DT)) return 0; } @@ -485,8 +536,8 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, // If the incoming value is the phi node itself, it can safely be skipped. if (Incoming == PI) continue; Value *V = PI == LHS ? - SimplifyBinOp(Opcode, Incoming, RHS, TD, DT, MaxRecurse) : - SimplifyBinOp(Opcode, LHS, Incoming, TD, DT, MaxRecurse); + SimplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse) : + SimplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse); // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) @@ -502,8 +553,7 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, /// incoming phi values yields the same result every time. If so returns the /// common result, otherwise returns null. static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return 0; @@ -517,7 +567,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, PHINode *PI = cast(LHS); // Bail out if RHS and the phi may be mutually interdependent due to a loop. - if (!ValueDominatesPHI(RHS, PI, DT)) + if (!ValueDominatesPHI(RHS, PI, Q.DT)) return 0; // Evaluate the BinOp on the incoming phi values. @@ -526,7 +576,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, Value *Incoming = PI->getIncomingValue(i); // If the incoming value is the phi node itself, it can safely be skipped. if (Incoming == PI) continue; - Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, DT, MaxRecurse); + Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse); // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) @@ -540,13 +590,12 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, /// SimplifyAddInst - Given operands for an Add, see if we can /// fold the result. If not, this returns null. static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast(Op0)) { if (Constant *CRHS = dyn_cast(Op1)) { Constant *Ops[] = { CLHS, CRHS }; - return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), - Ops, TD); + return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), Ops, + Q.TD, Q.TLI); } // Canonicalize the constant to the RHS. @@ -576,17 +625,17 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// i1 add -> xor. if (MaxRecurse && Op0->getType()->isIntegerTy(1)) - if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) return V; // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, DT, + if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q, MaxRecurse)) return V; // Mul distributes over Add. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul, - TD, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // Threading Add over selects and phi nodes is pointless, so don't bother. @@ -602,20 +651,116 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit); + const TargetData *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT), + RecursionLimit); +} + +/// \brief Accumulate the constant integer offset a GEP represents. +/// +/// Given a getelementptr instruction/constantexpr, accumulate the constant +/// offset from the base pointer into the provided APInt 'Offset'. Returns true +/// if the GEP has all-constant indices. Returns false if any non-constant +/// index is encountered leaving the 'Offset' in an undefined state. The +/// 'Offset' APInt must be the bitwidth of the target's pointer size. +static bool accumulateGEPOffset(const TargetData &TD, GEPOperator *GEP, + APInt &Offset) { + unsigned IntPtrWidth = TD.getPointerSizeInBits(); + assert(IntPtrWidth == Offset.getBitWidth()); + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E; + ++I, ++GTI) { + ConstantInt *OpC = dyn_cast(*I); + if (!OpC) return false; + if (OpC->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (StructType *STy = dyn_cast(*GTI)) { + unsigned ElementIdx = OpC->getZExtValue(); + const StructLayout *SL = TD.getStructLayout(STy); + Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); + continue; + } + + APInt TypeSize(IntPtrWidth, TD.getTypeAllocSize(GTI.getIndexedType())); + Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; + } + return true; +} + +/// \brief Compute the base pointer and cumulative constant offsets for V. +/// +/// This strips all constant offsets off of V, leaving it the base pointer, and +/// accumulates the total constant offset applied in the returned constant. It +/// returns 0 if V is not a pointer, and returns the constant '0' if there are +/// no constant offsets applied. +static Constant *stripAndComputeConstantOffsets(const TargetData &TD, + Value *&V) { + if (!V->getType()->isPointerTy()) + return 0; + + unsigned IntPtrWidth = TD.getPointerSizeInBits(); + APInt Offset = APInt::getNullValue(IntPtrWidth); + + // Even though we don't look through PHI nodes, we could be called on an + // instruction in an unreachable block, which may be on a cycle. + SmallPtrSet Visited; + Visited.insert(V); + do { + if (GEPOperator *GEP = dyn_cast(V)) { + if (!GEP->isInBounds() || !accumulateGEPOffset(TD, GEP, Offset)) + break; + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast(V)) { + if (GA->mayBeOverridden()) + break; + V = GA->getAliasee(); + } else { + break; + } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } while (Visited.insert(V)); + + Type *IntPtrTy = TD.getIntPtrType(V->getContext()); + return ConstantInt::get(IntPtrTy, Offset); +} + +/// \brief Compute the constant difference between two pointer values. +/// If the difference is not a constant, returns zero. +static Constant *computePointerDifference(const TargetData &TD, + Value *LHS, Value *RHS) { + Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); + if (!LHSOffset) + return 0; + Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); + if (!RHSOffset) + return 0; + + // If LHS and RHS are not related via constant offsets to the same base + // value, there is nothing we can do here. + if (LHS != RHS) + return 0; + + // Otherwise, the difference of LHS - RHS can be computed as: + // LHS - RHS + // = (LHSOffset + Base) - (RHSOffset + Base) + // = LHSOffset - RHSOffset + return ConstantExpr::getSub(LHSOffset, RHSOffset); } /// SimplifySubInst - Given operands for a Sub, see if we can /// fold the result. If not, this returns null. static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast(Op0)) if (Constant *CRHS = dyn_cast(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(), - Ops, TD); + Ops, Q.TD, Q.TLI); } // X - undef -> undef @@ -643,19 +788,17 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *Y = 0, *Z = Op1; if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z // See if "V === Y - Z" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1)) // It does! Now see if "X + V" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, DT, - MaxRecurse-1)) { + if (Value *W = SimplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; } // See if "V === X - Z" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1)) // It does! Now see if "Y + V" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, DT, - MaxRecurse-1)) { + if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; @@ -667,19 +810,17 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, X = Op0; if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z) // See if "V === X - Y" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1)) // It does! Now see if "V - Z" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, DT, - MaxRecurse-1)) { + if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; } // See if "V === X - Z" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1)) // It does! Now see if "V - Y" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, DT, - MaxRecurse-1)) { + if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; @@ -691,23 +832,39 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Z = Op0; if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y) // See if "V === Z - X" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse-1)) // It does! Now see if "V + Y" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, DT, - MaxRecurse-1)) { + if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; } + // trunc(X) - trunc(Y) -> trunc(X - Y) if everything simplifies. + if (MaxRecurse && match(Op0, m_Trunc(m_Value(X))) && + match(Op1, m_Trunc(m_Value(Y)))) + if (X->getType() == Y->getType()) + // See if "V === X - Y" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1)) + // It does! Now see if "trunc V" simplifies. + if (Value *W = SimplifyTruncInst(V, Op0->getType(), Q, MaxRecurse-1)) + // It does, return the simplified "trunc V". + return W; + + // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...). + if (Q.TD && match(Op0, m_PtrToInt(m_Value(X))) && + match(Op1, m_PtrToInt(m_Value(Y)))) + if (Constant *Result = computePointerDifference(*Q.TD, X, Y)) + return ConstantExpr::getIntegerCast(Result, Op0->getType(), true); + // Mul distributes over Sub. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul, - TD, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // i1 sub -> xor. if (MaxRecurse && Op0->getType()->isIntegerTy(1)) - if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) return V; // Threading Sub over selects and phi nodes is pointless, so don't bother. @@ -723,19 +880,21 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit); + const TargetData *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT), + RecursionLimit); } /// SimplifyMulInst - Given operands for a Mul, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { +static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast(Op0)) { if (Constant *CRHS = dyn_cast(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(), - Ops, TD); + Ops, Q.TD, Q.TLI); } // Canonicalize the constant to the RHS. @@ -755,40 +914,37 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, return Op0; // (X / Y) * Y -> X if the division is exact. - Value *X = 0, *Y = 0; - if ((match(Op0, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op1) || // (X / Y) * Y - (match(Op1, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op0)) { // Y * (X / Y) - BinaryOperator *Div = cast(Y == Op1 ? Op0 : Op1); - if (Div->isExact()) - return X; - } + Value *X = 0; + if (match(Op0, m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y + match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0))))) // Y * (X / Y) + return X; // i1 mul -> and. if (MaxRecurse && Op0->getType()->isIntegerTy(1)) - if (Value *V = SimplifyAndInst(Op0, Op1, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1)) return V; // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, DT, + if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q, MaxRecurse)) return V; // Mul distributes over Add. Try some generic simplifications based on this. if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add, - TD, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, DT, + if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, DT, + if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q, MaxRecurse)) return V; @@ -796,19 +952,19 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, } Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyMulInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyMulInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can /// fold the result. If not, this returns null. static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { if (Constant *C0 = dyn_cast(Op0)) { if (Constant *C1 = dyn_cast(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI); } } @@ -842,7 +998,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, Value *X = 0, *Y = 0; if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) { if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1 - BinaryOperator *Mul = cast(Op0); + OverflowingBinaryOperator *Mul = cast(Op0); // If the Mul knows it does not overflow, then we are good to go. if ((isSigned && Mul->hasNoSignedWrap()) || (!isSigned && Mul->hasNoUnsignedWrap())) @@ -861,13 +1017,13 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; return 0; @@ -875,36 +1031,38 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, /// SimplifySDivInst - Given operands for an SDiv, see if we can /// fold the result. If not, this returns null. -static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { - if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, DT, MaxRecurse)) +static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse)) return V; return 0; } Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifySDivInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifySDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyUDivInst - Given operands for a UDiv, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { - if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, DT, MaxRecurse)) +static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse)) return V; return 0; } Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyUDivInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyUDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } -static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *, - const DominatorTree *, unsigned) { +static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q, + unsigned) { // undef / X -> undef (the undef could be a snan). if (match(Op0, m_Undef())) return Op0; @@ -917,19 +1075,19 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *, } Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyFDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyRem - Given operands for an SRem or URem, see if we can /// fold the result. If not, this returns null. static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { if (Constant *C0 = dyn_cast(Op0)) { if (Constant *C1 = dyn_cast(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI); } } @@ -964,13 +1122,13 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; return 0; @@ -978,36 +1136,38 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, /// SimplifySRemInst - Given operands for an SRem, see if we can /// fold the result. If not, this returns null. -static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { - if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, DT, MaxRecurse)) +static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse)) return V; return 0; } Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifySRemInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifySRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyURemInst - Given operands for a URem, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { - if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, DT, MaxRecurse)) +static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse)) return V; return 0; } Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyURemInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyURemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } -static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *, - const DominatorTree *, unsigned) { +static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &, + unsigned) { // undef % X -> undef (the undef could be a snan). if (match(Op0, m_Undef())) return Op0; @@ -1020,19 +1180,19 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *, } Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyFRemInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyFRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { if (Constant *C0 = dyn_cast(Op0)) { if (Constant *C1 = dyn_cast(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI); } } @@ -1057,13 +1217,13 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; return 0; @@ -1072,9 +1232,8 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, /// SimplifyShlInst - Given operands for an Shl, see if we can /// fold the result. If not, this returns null. static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { - if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, DT, MaxRecurse)) + const Query &Q, unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse)) return V; // undef << X -> 0 @@ -1083,23 +1242,23 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // (X >> A) << A -> X Value *X; - if (match(Op0, m_Shr(m_Value(X), m_Specific(Op1))) && - cast(Op0)->isExact()) + if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1))))) return X; return 0; } Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit); + const TargetData *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT), + RecursionLimit); } /// SimplifyLShrInst - Given operands for an LShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { - if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, DT, MaxRecurse)) + const Query &Q, unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, Q, MaxRecurse)) return V; // undef >>l X -> 0 @@ -1116,16 +1275,18 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, } Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyLShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit); + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyLShrInst(Op0, Op1, isExact, Query (TD, TLI, DT), + RecursionLimit); } /// SimplifyAShrInst - Given operands for an AShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { - if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, DT, MaxRecurse)) + const Query &Q, unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, Q, MaxRecurse)) return V; // all ones >>a X -> all ones @@ -1146,19 +1307,22 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, } Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyAShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit); + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyAShrInst(Op0, Op1, isExact, Query (TD, TLI, DT), + RecursionLimit); } /// SimplifyAndInst - Given operands for an And, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { +static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast(Op0)) { if (Constant *CRHS = dyn_cast(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::And, CLHS->getType(), - Ops, TD); + Ops, Q.TD, Q.TLI); } // Canonicalize the constant to the RHS. @@ -1197,37 +1361,46 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, (A == Op0 || B == Op0)) return Op0; + // A & (-A) = A if A is a power of two or zero. + if (match(Op0, m_Neg(m_Specific(Op1))) || + match(Op1, m_Neg(m_Specific(Op0)))) { + if (isPowerOfTwo(Op0, Q.TD, /*OrZero*/true)) + return Op0; + if (isPowerOfTwo(Op1, Q.TD, /*OrZero*/true)) + return Op1; + } + // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, DT, + if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q, MaxRecurse)) return V; // And distributes over Or. Try some generic simplifications based on this. if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or, - TD, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // And distributes over Xor. Try some generic simplifications based on this. if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor, - TD, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // Or distributes over And. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or, - TD, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, DT, + if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, DT, + if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, Q, MaxRecurse)) return V; @@ -1235,19 +1408,20 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, } Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyAndInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyAndInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyOrInst - Given operands for an Or, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { +static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast(Op0)) { if (Constant *CRHS = dyn_cast(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(), - Ops, TD); + Ops, Q.TD, Q.TLI); } // Canonicalize the constant to the RHS. @@ -1297,51 +1471,51 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, return Constant::getAllOnesValue(Op0->getType()); // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, DT, + if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; // Or distributes over And. Try some generic simplifications based on this. - if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, - TD, DT, MaxRecurse)) + if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, Q, + MaxRecurse)) return V; // And distributes over Or. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And, - TD, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, DT, + if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(Op0) || isa(Op1)) - if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, DT, - MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; return 0; } Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyOrInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyOrInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyXorInst - Given operands for a Xor, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { +static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast(Op0)) { if (Constant *CRHS = dyn_cast(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(), - Ops, TD); + Ops, Q.TD, Q.TLI); } // Canonicalize the constant to the RHS. @@ -1366,13 +1540,13 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, return Constant::getAllOnesValue(Op0->getType()); // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, DT, + if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q, MaxRecurse)) return V; // And distributes over Xor. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And, - TD, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // Threading Xor over selects and phi nodes is pointless, so don't bother. @@ -1388,8 +1562,9 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, } Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyXorInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyXorInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } static Type *GetCompareTy(Value *Op) { @@ -1416,17 +1591,56 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, return 0; } +static Constant *computePointerICmp(const TargetData &TD, + CmpInst::Predicate Pred, + Value *LHS, Value *RHS) { + // We can only fold certain predicates on pointer comparisons. + switch (Pred) { + default: + return 0; + + // Equality comaprisons are easy to fold. + case CmpInst::ICMP_EQ: + case CmpInst::ICMP_NE: + break; + + // We can only handle unsigned relational comparisons because 'inbounds' on + // a GEP only protects against unsigned wrapping. + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_ULE: + // However, we have to switch them to their signed variants to handle + // negative indices from the base pointer. + Pred = ICmpInst::getSignedPredicate(Pred); + break; + } + + Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); + if (!LHSOffset) + return 0; + Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); + if (!RHSOffset) + return 0; + + // If LHS and RHS are not related via constant offsets to the same base + // value, there is nothing we can do here. + if (LHS != RHS) + return 0; + + return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset); +} + /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can /// fold the result. If not, this returns null. static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); if (Constant *CLHS = dyn_cast(LHS)) { if (Constant *CRHS = dyn_cast(RHS)) - return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD); + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI); // If we have a constant, make sure it is on the RHS. std::swap(LHS, RHS); @@ -1443,8 +1657,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); // Special case logic when the operands have i1 type. - if (OpTy->isIntegerTy(1) || (OpTy->isVectorTy() && - cast(OpTy)->getElementType()->isIntegerTy(1))) { + if (OpTy->getScalarType()->isIntegerTy(1)) { switch (Pred) { default: break; case ICmpInst::ICMP_EQ: @@ -1480,63 +1693,101 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } - // icmp , - Different stack variables have - // different addresses, and what's more the address of a stack variable is - // never null or equal to the address of a global. Note that generalizing - // to the case where LHS is a global variable address or null is pointless, - // since if both LHS and RHS are constants then we already constant folded - // the compare, and if only one of them is then we moved it to RHS already. - if (isa(LHS) && (isa(RHS) || isa(RHS) || - isa(RHS))) - // We already know that LHS != RHS. - return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred)); + // icmp , - Different identified objects have + // different addresses (unless null), and what's more the address of an + // identified local is never equal to another argument (again, barring null). + // Note that generalizing to the case where LHS is a global variable address + // or null is pointless, since if both LHS and RHS are constants then we + // already constant folded the compare, and if only one of them is then we + // moved it to RHS already. + Value *LHSPtr = LHS->stripPointerCasts(); + Value *RHSPtr = RHS->stripPointerCasts(); + if (LHSPtr == RHSPtr) + return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); + + // Be more aggressive about stripping pointer adjustments when checking a + // comparison of an alloca address to another object. We can rip off all + // inbounds GEP operations, even if they are variable. + LHSPtr = LHSPtr->stripInBoundsOffsets(); + if (llvm::isIdentifiedObject(LHSPtr)) { + RHSPtr = RHSPtr->stripInBoundsOffsets(); + if (llvm::isKnownNonNull(LHSPtr) || llvm::isKnownNonNull(RHSPtr)) { + // If both sides are different identified objects, they aren't equal + // unless they're null. + if (LHSPtr != RHSPtr && llvm::isIdentifiedObject(RHSPtr) && + Pred == CmpInst::ICMP_EQ) + return ConstantInt::get(ITy, false); + + // A local identified object (alloca or noalias call) can't equal any + // incoming argument, unless they're both null. + if (isa(LHSPtr) && isa(RHSPtr) && + Pred == CmpInst::ICMP_EQ) + return ConstantInt::get(ITy, false); + } + + // Assume that the constant null is on the right. + if (llvm::isKnownNonNull(LHSPtr) && isa(RHSPtr)) { + if (Pred == CmpInst::ICMP_EQ) + return ConstantInt::get(ITy, false); + else if (Pred == CmpInst::ICMP_NE) + return ConstantInt::get(ITy, true); + } + } else if (isa(LHSPtr)) { + RHSPtr = RHSPtr->stripInBoundsOffsets(); + // An alloca can't be equal to an argument. + if (isa(RHSPtr)) { + if (Pred == CmpInst::ICMP_EQ) + return ConstantInt::get(ITy, false); + else if (Pred == CmpInst::ICMP_NE) + return ConstantInt::get(ITy, true); + } + } // If we are comparing with zero then try hard since this is a common case. if (match(RHS, m_Zero())) { bool LHSKnownNonNegative, LHSKnownNegative; switch (Pred) { - default: - assert(false && "Unknown ICmp predicate!"); + default: llvm_unreachable("Unknown ICmp predicate!"); case ICmpInst::ICMP_ULT: return getFalse(ITy); case ICmpInst::ICMP_UGE: return getTrue(ITy); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE: - if (isKnownNonZero(LHS, TD)) + if (isKnownNonZero(LHS, Q.TD)) return getFalse(ITy); break; case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: - if (isKnownNonZero(LHS, TD)) + if (isKnownNonZero(LHS, Q.TD)) return getTrue(ITy); break; case ICmpInst::ICMP_SLT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); if (LHSKnownNegative) return getTrue(ITy); if (LHSKnownNonNegative) return getFalse(ITy); break; case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); if (LHSKnownNegative) return getTrue(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, TD)) + if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD)) return getFalse(ITy); break; case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); if (LHSKnownNegative) return getFalse(ITy); if (LHSKnownNonNegative) return getTrue(ITy); break; case ICmpInst::ICMP_SGT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); if (LHSKnownNegative) return getFalse(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, TD)) + if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD)) return getTrue(ITy); break; } @@ -1564,6 +1815,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // 'srem x, CI2' produces (-|CI2|, |CI2|). Upper = CI2->getValue().abs(); Lower = (-Upper) + 1; + } else if (match(LHS, m_UDiv(m_ConstantInt(CI2), m_Value()))) { + // 'udiv CI2, x' produces [0, CI2]. + Upper = CI2->getValue() + 1; } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) { // 'udiv x, CI2' produces [0, UINT_MAX / CI2]. APInt NegOne = APInt::getAllOnesValue(Width); @@ -1616,19 +1870,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input // if the integer type is the same size as the pointer type. - if (MaxRecurse && TD && isa(LI) && - TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) { + if (MaxRecurse && Q.TD && isa(LI) && + Q.TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) { if (Constant *RHSC = dyn_cast(RHS)) { // Transfer the cast to the constant. if (Value *V = SimplifyICmpInst(Pred, SrcOp, ConstantExpr::getIntToPtr(RHSC, SrcTy), - TD, DT, MaxRecurse-1)) + Q, MaxRecurse-1)) return V; } else if (PtrToIntInst *RI = dyn_cast(RHS)) { if (RI->getOperand(0)->getType() == SrcTy) // Compare without the cast. if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), - TD, DT, MaxRecurse-1)) + Q, MaxRecurse-1)) return V; } } @@ -1640,7 +1894,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) // Compare X and Y. Note that signed predicates become unsigned. if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), - SrcOp, RI->getOperand(0), TD, DT, + SrcOp, RI->getOperand(0), Q, MaxRecurse-1)) return V; } @@ -1656,15 +1910,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // also a case of comparing two zero-extended values. if (RExt == CI && MaxRecurse) if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), - SrcOp, Trunc, TD, DT, MaxRecurse-1)) + SrcOp, Trunc, Q, MaxRecurse-1)) return V; // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit // there. Use this to work out the result of the comparison. if (RExt != CI) { switch (Pred) { - default: - assert(false && "Unknown ICmp predicate!"); + default: llvm_unreachable("Unknown ICmp predicate!"); // LHS getOperand(0)->getType()) // Compare X and Y. Note that the predicate does not change. if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), - TD, DT, MaxRecurse-1)) + Q, MaxRecurse-1)) return V; } // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended @@ -1715,16 +1968,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // If the re-extended constant didn't change then this is effectively // also a case of comparing two sign-extended values. if (RExt == CI && MaxRecurse) - if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, DT, - MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1)) return V; // Otherwise the upper bits of LHS are all equal, while RHS has varying // bits there. Use this to work out the result of the comparison. if (RExt != CI) { switch (Pred) { - default: - assert(false && "Unknown ICmp predicate!"); + default: llvm_unreachable("Unknown ICmp predicate!"); case ICmpInst::ICMP_EQ: return ConstantInt::getFalse(CI->getContext()); case ICmpInst::ICMP_NE: @@ -1751,7 +2002,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (MaxRecurse) if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp, Constant::getNullValue(SrcTy), - TD, DT, MaxRecurse-1)) + Q, MaxRecurse-1)) return V; break; case ICmpInst::ICMP_ULT: @@ -1760,7 +2011,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (MaxRecurse) if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp, Constant::getNullValue(SrcTy), - TD, DT, MaxRecurse-1)) + Q, MaxRecurse-1)) return V; break; } @@ -1794,14 +2045,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if ((A == RHS || B == RHS) && NoLHSWrapProblem) if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A, Constant::getNullValue(RHS->getType()), - TD, DT, MaxRecurse-1)) + Q, MaxRecurse-1)) return V; // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. if ((C == LHS || D == LHS) && NoRHSWrapProblem) if (Value *V = SimplifyICmpInst(Pred, Constant::getNullValue(LHS->getType()), - C == LHS ? D : C, TD, DT, MaxRecurse-1)) + C == LHS ? D : C, Q, MaxRecurse-1)) return V; // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow. @@ -1810,7 +2061,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Determine Y and Z in the form icmp (X+Y), (X+Z). Value *Y = (A == C || A == D) ? B : A; Value *Z = (C == A || C == B) ? D : C; - if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse-1)) return V; } } @@ -1822,7 +2073,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -1832,7 +2083,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getFalse(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -1849,7 +2100,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -1859,7 +2110,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getTrue(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -1870,6 +2121,15 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // x udiv y <=u x. + if (LBO && match(LBO, m_UDiv(m_Specific(RHS), m_Value()))) { + // icmp pred (X /u Y), X + if (Pred == ICmpInst::ICMP_UGT) + return getFalse(ITy); + if (Pred == ICmpInst::ICMP_ULE) + return getTrue(ITy); + } + if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() && LBO->getOperand(1) == RBO->getOperand(1)) { switch (LBO->getOpcode()) { @@ -1884,7 +2144,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (!LBO->isExact() || !RBO->isExact()) break; if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), - RBO->getOperand(0), TD, DT, MaxRecurse-1)) + RBO->getOperand(0), Q, MaxRecurse-1)) return V; break; case Instruction::Shl: { @@ -1895,7 +2155,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (!NSW && ICmpInst::isSigned(Pred)) break; if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), - RBO->getOperand(0), TD, DT, MaxRecurse-1)) + RBO->getOperand(0), Q, MaxRecurse-1)) return V; break; } @@ -1949,7 +2209,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A EqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1)) return V; break; case CmpInst::ICMP_NE: @@ -1963,7 +2223,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A InvEqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1)) return V; break; } @@ -2019,7 +2279,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A EqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1)) return V; break; case CmpInst::ICMP_NE: @@ -2033,7 +2293,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A InvEqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1)) return V; break; } @@ -2090,37 +2350,66 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getFalse(ITy); } + // Simplify comparisons of related pointers using a powerful, recursive + // GEP-walk when we have target data available.. + if (Q.TD && LHS->getType()->isPointerTy() && RHS->getType()->isPointerTy()) + if (Constant *C = computePointerICmp(*Q.TD, Pred, LHS, RHS)) + return C; + + if (GetElementPtrInst *GLHS = dyn_cast(LHS)) { + if (GEPOperator *GRHS = dyn_cast(RHS)) { + if (GLHS->getPointerOperand() == GRHS->getPointerOperand() && + GLHS->hasAllConstantIndices() && GRHS->hasAllConstantIndices() && + (ICmpInst::isEquality(Pred) || + (GLHS->isInBounds() && GRHS->isInBounds() && + Pred == ICmpInst::getSignedPredicate(Pred)))) { + // The bases are equal and the indices are constant. Build a constant + // expression GEP with the same indices and a null base pointer to see + // what constant folding can make out of it. + Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType()); + SmallVector IndicesLHS(GLHS->idx_begin(), GLHS->idx_end()); + Constant *NewLHS = ConstantExpr::getGetElementPtr(Null, IndicesLHS); + + SmallVector IndicesRHS(GRHS->idx_begin(), GRHS->idx_end()); + Constant *NewRHS = ConstantExpr::getGetElementPtr(Null, IndicesRHS); + return ConstantExpr::getICmp(Pred, NewLHS, NewRHS); + } + } + } + // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa(LHS) || isa(RHS)) - if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse)) + if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse)) return V; // If the comparison is with the result of a phi instruction, check whether // doing the compare with each incoming phi value yields a common result. if (isa(LHS) || isa(RHS)) - if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse)) + if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) return V; return 0; } Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit); + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT), + RecursionLimit); } /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can /// fold the result. If not, this returns null. static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!"); if (Constant *CLHS = dyn_cast(LHS)) { if (Constant *CRHS = dyn_cast(RHS)) - return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD); + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI); // If we have a constant, make sure it is on the RHS. std::swap(LHS, RHS); @@ -2188,27 +2477,31 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa(LHS) || isa(RHS)) - if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse)) + if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse)) return V; // If the comparison is with the result of a phi instruction, check whether // doing the compare with each incoming phi value yields a common result. if (isa(LHS) || isa(RHS)) - if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse)) + if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) return V; return 0; } Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit); + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT), + RecursionLimit); } /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold /// the result. If not, this returns null. -Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal, - const TargetData *TD, const DominatorTree *) { +static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, + Value *FalseVal, const Query &Q, + unsigned MaxRecurse) { // select true, X, Y -> X // select false, X, Y -> Y if (ConstantInt *CB = dyn_cast(CondVal)) @@ -2231,12 +2524,22 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal, return 0; } +Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Query (TD, TLI, DT), + RecursionLimit); +} + /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can /// fold the result. If not, this returns null. -Value *llvm::SimplifyGEPInst(ArrayRef Ops, - const TargetData *TD, const DominatorTree *) { +static Value *SimplifyGEPInst(ArrayRef Ops, const Query &Q, unsigned) { // The type of the GEP pointer operand. - PointerType *PtrTy = cast(Ops[0]->getType()); + PointerType *PtrTy = dyn_cast(Ops[0]->getType()); + // The GEP pointer operand is not a pointer, it's a vector of pointers. + if (!PtrTy) + return 0; // getelementptr P -> P. if (Ops.size() == 1) @@ -2255,9 +2558,9 @@ Value *llvm::SimplifyGEPInst(ArrayRef Ops, if (C->isZero()) return Ops[0]; // getelementptr P, N -> P if P points to a type of zero size. - if (TD) { + if (Q.TD) { Type *Ty = PtrTy->getElementType(); - if (Ty->isSized() && TD->getTypeAllocSize(Ty) == 0) + if (Ty->isSized() && Q.TD->getTypeAllocSize(Ty) == 0) return Ops[0]; } } @@ -2270,12 +2573,17 @@ Value *llvm::SimplifyGEPInst(ArrayRef Ops, return ConstantExpr::getGetElementPtr(cast(Ops[0]), Ops.slice(1)); } +Value *llvm::SimplifyGEPInst(ArrayRef Ops, const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyGEPInst(Ops, Query (TD, TLI, DT), RecursionLimit); +} + /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we /// can fold the result. If not, this returns null. -Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, - ArrayRef Idxs, - const TargetData *, - const DominatorTree *) { +static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, + ArrayRef Idxs, const Query &Q, + unsigned) { if (Constant *CAgg = dyn_cast(Agg)) if (Constant *CVal = dyn_cast(Val)) return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs); @@ -2300,8 +2608,17 @@ Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, return 0; } +Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, + ArrayRef Idxs, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (TD, TLI, DT), + RecursionLimit); +} + /// SimplifyPHINode - See if we can fold the given phi. If not, returns null. -static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) { +static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { // If all of the PHI's incoming values are the same then replace the PHI node // with the common value. Value *CommonValue = 0; @@ -2329,67 +2646,77 @@ static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) { // instruction, we cannot return X as the result of the PHI node unless it // dominates the PHI block. if (HasUndefInput) - return ValueDominatesPHI(CommonValue, PN, DT) ? CommonValue : 0; + return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : 0; return CommonValue; } +static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) { + if (Constant *C = dyn_cast(Op)) + return ConstantFoldInstOperands(Instruction::Trunc, Ty, C, Q.TD, Q.TLI); + + return 0; +} + +Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyTruncInst(Op, Ty, Query (TD, TLI, DT), RecursionLimit); +} //=== Helper functions for higher up the class hierarchy. /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can /// fold the result. If not, this returns null. static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { switch (Opcode) { case Instruction::Add: return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, - TD, DT, MaxRecurse); + Q, MaxRecurse); case Instruction::Sub: return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, - TD, DT, MaxRecurse); - case Instruction::Mul: return SimplifyMulInst (LHS, RHS, TD, DT, MaxRecurse); - case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse); - case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse); - case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse); - case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, DT, MaxRecurse); - case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, DT, MaxRecurse); - case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, DT, MaxRecurse); + Q, MaxRecurse); + case Instruction::Mul: return SimplifyMulInst (LHS, RHS, Q, MaxRecurse); + case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse); + case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse); + case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, Q, MaxRecurse); + case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse); + case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse); + case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, Q, MaxRecurse); case Instruction::Shl: return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, - TD, DT, MaxRecurse); + Q, MaxRecurse); case Instruction::LShr: - return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse); + return SimplifyLShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse); case Instruction::AShr: - return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse); - case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, DT, MaxRecurse); - case Instruction::Or: return SimplifyOrInst (LHS, RHS, TD, DT, MaxRecurse); - case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, DT, MaxRecurse); + return SimplifyAShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse); + case Instruction::And: return SimplifyAndInst(LHS, RHS, Q, MaxRecurse); + case Instruction::Or: return SimplifyOrInst (LHS, RHS, Q, MaxRecurse); + case Instruction::Xor: return SimplifyXorInst(LHS, RHS, Q, MaxRecurse); default: if (Constant *CLHS = dyn_cast(LHS)) if (Constant *CRHS = dyn_cast(RHS)) { Constant *COps[] = {CLHS, CRHS}; - return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, TD); + return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, Q.TD, + Q.TLI); } // If the operation is associative, try some generic simplifications. if (Instruction::isAssociative(Opcode)) - if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, DT, - MaxRecurse)) + if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, Q, MaxRecurse)) return V; - // If the operation is with the result of a select instruction, check whether + // If the operation is with the result of a select instruction check whether // operating on either branch of the select always yields the same value. if (isa(LHS) || isa(RHS)) - if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, DT, - MaxRecurse)) + if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa(LHS) || isa(RHS)) - if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, Q, MaxRecurse)) return V; return 0; @@ -2397,119 +2724,136 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, } Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyBinOp(Opcode, LHS, RHS, TD, DT, RecursionLimit); + const TargetData *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyBinOp(Opcode, LHS, RHS, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyCmpInst - Given operands for a CmpInst, see if we can /// fold the result. static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) - return SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse); - return SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse); + return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse); + return SimplifyFCmpInst(Predicate, LHS, RHS, Q, MaxRecurse); } Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit); + const TargetData *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT), + RecursionLimit); +} + +static Value *SimplifyCallInst(CallInst *CI, const Query &) { + // call undef -> undef + if (isa(CI->getCalledValue())) + return UndefValue::get(CI->getType()); + + return 0; } /// SimplifyInstruction - See if we can compute a simplified version of this /// instruction. If not, this returns null. Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { Value *Result; switch (I->getOpcode()) { default: - Result = ConstantFoldInstruction(I, TD); + Result = ConstantFoldInstruction(I, TD, TLI); break; case Instruction::Add: Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), - TD, DT); + TD, TLI, DT); break; case Instruction::Sub: Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), - TD, DT); + TD, TLI, DT); break; case Instruction::Mul: - Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::SDiv: - Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::UDiv: - Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::FDiv: - Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::SRem: - Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::URem: - Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::FRem: - Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::Shl: Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), - TD, DT); + TD, TLI, DT); break; case Instruction::LShr: Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1), cast(I)->isExact(), - TD, DT); + TD, TLI, DT); break; case Instruction::AShr: Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1), cast(I)->isExact(), - TD, DT); + TD, TLI, DT); break; case Instruction::And: - Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::Or: - Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::Xor: - Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::ICmp: Result = SimplifyICmpInst(cast(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), TD, DT); + I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::FCmp: Result = SimplifyFCmpInst(cast(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), TD, DT); + I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::Select: Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), - I->getOperand(2), TD, DT); + I->getOperand(2), TD, TLI, DT); break; case Instruction::GetElementPtr: { SmallVector Ops(I->op_begin(), I->op_end()); - Result = SimplifyGEPInst(Ops, TD, DT); + Result = SimplifyGEPInst(Ops, TD, TLI, DT); break; } case Instruction::InsertValue: { InsertValueInst *IV = cast(I); Result = SimplifyInsertValueInst(IV->getAggregateOperand(), IV->getInsertedValueOperand(), - IV->getIndices(), TD, DT); + IV->getIndices(), TD, TLI, DT); break; } case Instruction::PHI: - Result = SimplifyPHINode(cast(I), DT); + Result = SimplifyPHINode(cast(I), Query (TD, TLI, DT)); + break; + case Instruction::Call: + Result = SimplifyCallInst(cast(I), Query (TD, TLI, DT)); + break; + case Instruction::Trunc: + Result = SimplifyTruncInst(I->getOperand(0), I->getType(), TD, TLI, DT); break; } @@ -2519,57 +2863,84 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, return Result == I ? UndefValue::get(I->getType()) : Result; } -/// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then -/// delete the From instruction. In addition to a basic RAUW, this does a -/// recursive simplification of the newly formed instructions. This catches -/// things where one simplification exposes other opportunities. This only -/// simplifies and deletes scalar operations, it does not change the CFG. +/// \brief Implementation of recursive simplification through an instructions +/// uses. /// -void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To, - const TargetData *TD, - const DominatorTree *DT) { - assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!"); +/// This is the common implementation of the recursive simplification routines. +/// If we have a pre-simplified value in 'SimpleV', that is forcibly used to +/// replace the instruction 'I'. Otherwise, we simply add 'I' to the list of +/// instructions to process and attempt to simplify it using +/// InstructionSimplify. +/// +/// This routine returns 'true' only when *it* simplifies something. The passed +/// in simplified value does not count toward this. +static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + bool Simplified = false; + SmallSetVector Worklist; - // FromHandle/ToHandle - This keeps a WeakVH on the from/to values so that - // we can know if it gets deleted out from under us or replaced in a - // recursive simplification. - WeakVH FromHandle(From); - WeakVH ToHandle(To); + // If we have an explicit value to collapse to, do that round of the + // simplification loop by hand initially. + if (SimpleV) { + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; + ++UI) + if (*UI != I) + Worklist.insert(cast(*UI)); - while (!From->use_empty()) { - // Update the instruction to use the new value. - Use &TheUse = From->use_begin().getUse(); - Instruction *User = cast(TheUse.getUser()); - TheUse = To; + // Replace the instruction with its simplified value. + I->replaceAllUsesWith(SimpleV); - // Check to see if the instruction can be folded due to the operand - // replacement. For example changing (or X, Y) into (or X, -1) can replace - // the 'or' with -1. - Value *SimplifiedVal; - { - // Sanity check to make sure 'User' doesn't dangle across - // SimplifyInstruction. - AssertingVH<> UserHandle(User); - - SimplifiedVal = SimplifyInstruction(User, TD, DT); - if (SimplifiedVal == 0) continue; - } - - // Recursively simplify this user to the new value. - ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, DT); - From = dyn_cast_or_null((Value*)FromHandle); - To = ToHandle; - - assert(ToHandle && "To value deleted by recursive simplification?"); - - // If the recursive simplification ended up revisiting and deleting - // 'From' then we're done. - if (From == 0) - return; + // Gracefully handle edge cases where the instruction is not wired into any + // parent block. + if (I->getParent()) + I->eraseFromParent(); + } else { + Worklist.insert(I); } - // If 'From' has value handles referring to it, do a real RAUW to update them. - From->replaceAllUsesWith(To); + // Note that we must test the size on each iteration, the worklist can grow. + for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { + I = Worklist[Idx]; - From->eraseFromParent(); + // See if this instruction simplifies. + SimpleV = SimplifyInstruction(I, TD, TLI, DT); + if (!SimpleV) + continue; + + Simplified = true; + + // Stash away all the uses of the old instruction so we can check them for + // recursive simplifications after a RAUW. This is cheaper than checking all + // uses of To on the recursive step in most cases. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; + ++UI) + Worklist.insert(cast(*UI)); + + // Replace the instruction with its simplified value. + I->replaceAllUsesWith(SimpleV); + + // Gracefully handle edge cases where the instruction is not wired into any + // parent block. + if (I->getParent()) + I->eraseFromParent(); + } + return Simplified; +} + +bool llvm::recursivelySimplifyInstruction(Instruction *I, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return replaceAndRecursivelySimplifyImpl(I, 0, TD, TLI, DT); +} + +bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!"); + assert(SimpleV && "Must provide a simplified value."); + return replaceAndRecursivelySimplifyImpl(I, SimpleV, TD, TLI, DT); } diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp index f80595c7dbed..5ca2746c9f6a 100644 --- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp @@ -20,20 +20,25 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CFG.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/PatternMatch.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/ValueHandle.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include #include using namespace llvm; +using namespace PatternMatch; char LazyValueInfo::ID = 0; -INITIALIZE_PASS(LazyValueInfo, "lazy-value-info", +INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info", + "Lazy Value Information Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(LazyValueInfo, "lazy-value-info", "Lazy Value Information Analysis", false, true) namespace llvm { @@ -61,10 +66,10 @@ class LVILatticeVal { constant, /// notconstant - This Value is known to not have the specified value. notconstant, - + /// constantrange - The Value falls within this range. constantrange, - + /// overdefined - This value is not known to be constant, and we know that /// it has a value. overdefined @@ -207,7 +212,7 @@ class LVILatticeVal { // Unless we can prove that the two Constants are different, we must // move to overdefined. - // FIXME: use TargetData for smarter constant folding. + // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding. if (ConstantInt *Res = dyn_cast( ConstantFoldCompareInstOperands(CmpInst::ICMP_NE, getConstant(), @@ -233,7 +238,7 @@ class LVILatticeVal { // Unless we can prove that the two Constants are different, we must // move to overdefined. - // FIXME: use TargetData for smarter constant folding. + // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding. if (ConstantInt *Res = dyn_cast( ConstantFoldCompareInstOperands(CmpInst::ICMP_NE, getNotConstant(), @@ -305,50 +310,6 @@ namespace { }; } -namespace llvm { - template<> - struct DenseMapInfo { - typedef DenseMapInfo PointerInfo; - static inline LVIValueHandle getEmptyKey() { - return LVIValueHandle(PointerInfo::getEmptyKey(), - static_cast(0)); - } - static inline LVIValueHandle getTombstoneKey() { - return LVIValueHandle(PointerInfo::getTombstoneKey(), - static_cast(0)); - } - static unsigned getHashValue(const LVIValueHandle &Val) { - return PointerInfo::getHashValue(Val); - } - static bool isEqual(const LVIValueHandle &LHS, const LVIValueHandle &RHS) { - return LHS == RHS; - } - }; - - template<> - struct DenseMapInfo, Value*> > { - typedef std::pair, Value*> PairTy; - typedef DenseMapInfo > APointerInfo; - typedef DenseMapInfo BPointerInfo; - static inline PairTy getEmptyKey() { - return std::make_pair(APointerInfo::getEmptyKey(), - BPointerInfo::getEmptyKey()); - } - static inline PairTy getTombstoneKey() { - return std::make_pair(APointerInfo::getTombstoneKey(), - BPointerInfo::getTombstoneKey()); - } - static unsigned getHashValue( const PairTy &Val) { - return APointerInfo::getHashValue(Val.first) ^ - BPointerInfo::getHashValue(Val.second); - } - static bool isEqual(const PairTy &LHS, const PairTy &RHS) { - return APointerInfo::isEqual(LHS.first, RHS.first) && - BPointerInfo::isEqual(LHS.second, RHS.second); - } - }; -} - namespace { /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which /// maintains information about queries across the clients' queries. @@ -360,14 +321,18 @@ namespace { /// ValueCache - This is all of the cached information for all values, /// mapped from Value* to key information. - DenseMap ValueCache; + std::map ValueCache; /// OverDefinedCache - This tracks, on a per-block basis, the set of /// values that are over-defined at the end of that block. This is required /// for cache updating. typedef std::pair, Value*> OverDefinedPairTy; DenseSet OverDefinedCache; - + + /// SeenBlocks - Keep track of all blocks that we have ever seen, so we + /// don't spend time removing unused blocks from our caches. + DenseSet > SeenBlocks; + /// BlockValueStack - This stack holds the state of the value solver /// during a query. It basically emulates the callstack of the naive /// recursive value lookup process. @@ -438,6 +403,7 @@ namespace { /// clear - Empty the cache. void clear() { + SeenBlocks.clear(); ValueCache.clear(); OverDefinedCache.clear(); } @@ -466,6 +432,12 @@ void LVIValueHandle::deleted() { } void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { + // Shortcut if we have never seen this block. + DenseSet >::iterator I = SeenBlocks.find(BB); + if (I == SeenBlocks.end()) + return; + SeenBlocks.erase(I); + SmallVector ToErase; for (DenseSet::iterator I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ++I) { @@ -477,7 +449,7 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { E = ToErase.end(); I != E; ++I) OverDefinedCache.erase(*I); - for (DenseMap::iterator + for (std::map::iterator I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I) I->second.erase(BB); } @@ -505,6 +477,7 @@ LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) { if (Constant *VC = dyn_cast(Val)) return LVILatticeVal::get(VC); + SeenBlocks.insert(BB); return lookup(Val)[BB]; } @@ -513,6 +486,7 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { return true; ValueCacheEntryTy &Cache = lookup(Val); + SeenBlocks.insert(BB); LVILatticeVal &BBLV = Cache[BB]; // OverDefinedCacheUpdater is a helper object that will update @@ -823,9 +797,8 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, // If the condition of the branch is an equality comparison, we may be // able to infer the value. ICmpInst *ICI = dyn_cast(BI->getCondition()); - if (ICI && ICI->getOperand(0) == Val && - isa(ICI->getOperand(1))) { - if (ICI->isEquality()) { + if (ICI && isa(ICI->getOperand(1))) { + if (ICI->isEquality() && ICI->getOperand(0) == Val) { // We know that V has the RHS constant if this is a true SETEQ or // false SETNE. if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ)) @@ -835,12 +808,23 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, return true; } - if (ConstantInt *CI = dyn_cast(ICI->getOperand(1))) { + // Recognize the range checking idiom that InstCombine produces. + // (X-C1) u< C2 --> [C1, C1+C2) + ConstantInt *NegOffset = 0; + if (ICI->getPredicate() == ICmpInst::ICMP_ULT) + match(ICI->getOperand(0), m_Add(m_Specific(Val), + m_ConstantInt(NegOffset))); + + ConstantInt *CI = dyn_cast(ICI->getOperand(1)); + if (CI && (ICI->getOperand(0) == Val || NegOffset)) { // Calculate the range of values that would satisfy the comparison. ConstantRange CmpRange(CI->getValue(), CI->getValue()+1); ConstantRange TrueValues = ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange); + if (NegOffset) // Apply the offset from above. + TrueValues = TrueValues.subtract(NegOffset->getValue()); + // If we're interested in the false dest, invert the condition. if (!isTrueDest) TrueValues = TrueValues.inverse(); @@ -882,10 +866,11 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, // BBFrom to BBTo. unsigned NumEdges = 0; ConstantInt *EdgeVal = 0; - for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) { - if (SI->getSuccessor(i) != BBTo) continue; + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { + if (i.getCaseSuccessor() != BBTo) continue; if (NumEdges++) break; - EdgeVal = SI->getCaseValue(i); + EdgeVal = i.getCaseValue(); } assert(EdgeVal && "Missing successor?"); if (NumEdges == 1) { @@ -1007,12 +992,19 @@ static LazyValueInfoCache &getCache(void *&PImpl) { bool LazyValueInfo::runOnFunction(Function &F) { if (PImpl) getCache(PImpl).clear(); - + TD = getAnalysisIfAvailable(); + TLI = &getAnalysis(); + // Fully lazy. return false; } +void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); +} + void LazyValueInfo::releaseMemory() { // If the cache was allocated, free it. if (PImpl) { @@ -1061,7 +1053,8 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, // If we know the value is a constant, evaluate the conditional. Constant *Res = 0; if (Result.isConstant()) { - Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD); + Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD, + TLI); if (ConstantInt *ResCI = dyn_cast(Res)) return ResCI->isZero() ? False : True; return Unknown; @@ -1102,13 +1095,15 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, if (Pred == ICmpInst::ICMP_EQ) { // !C1 == C -> false iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, - Result.getNotConstant(), C, TD); + Result.getNotConstant(), C, TD, + TLI); if (Res->isNullValue()) return False; } else if (Pred == ICmpInst::ICMP_NE) { // !C1 != C -> true iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, - Result.getNotConstant(), C, TD); + Result.getNotConstant(), C, TD, + TLI); if (Res->isNullValue()) return True; } diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp index 38d677d502a7..83bdf5286ad7 100644 --- a/contrib/llvm/lib/Analysis/Lint.cpp +++ b/contrib/llvm/lib/Analysis/Lint.cpp @@ -44,6 +44,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Pass.h" #include "llvm/PassManager.h" #include "llvm/IntrinsicInst.h" @@ -103,6 +104,7 @@ namespace { AliasAnalysis *AA; DominatorTree *DT; TargetData *TD; + TargetLibraryInfo *TLI; std::string Messages; raw_string_ostream MessagesStr; @@ -117,6 +119,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); } virtual void print(raw_ostream &O, const Module *M) const {} @@ -149,6 +152,7 @@ namespace { char Lint::ID = 0; INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", @@ -174,6 +178,7 @@ bool Lint::runOnFunction(Function &F) { AA = &getAnalysis(); DT = &getAnalysis(); TD = getAnalysisIfAvailable(); + TLI = &getAnalysis(); visit(F); dbgs() << MessagesStr.str(); Messages.clear(); @@ -411,9 +416,8 @@ void Lint::visitMemoryReference(Instruction &I, if (Align != 0) { unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType()); - APInt Mask = APInt::getAllOnesValue(BitWidth), - KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(Ptr, Mask, KnownZero, KnownOne, TD); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(Ptr, KnownZero, KnownOne, TD); Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))), "Undefined behavior: Memory reference address is misaligned", &I); } @@ -471,9 +475,8 @@ static bool isZero(Value *V, TargetData *TD) { if (isa(V)) return true; unsigned BitWidth = cast(V->getType())->getBitWidth(); - APInt Mask = APInt::getAllOnesValue(BitWidth), - KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(V, KnownZero, KnownOne, TD); return KnownZero.isAllOnesValue(); } @@ -614,10 +617,10 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, // As a last resort, try SimplifyInstruction or constant folding. if (Instruction *Inst = dyn_cast(V)) { - if (Value *W = SimplifyInstruction(Inst, TD, DT)) + if (Value *W = SimplifyInstruction(Inst, TD, TLI, DT)) return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast(V)) { - if (Value *W = ConstantFoldConstantExpression(CE, TD)) + if (Value *W = ConstantFoldConstantExpression(CE, TD, TLI)) if (W != V) return findValueImpl(W, OffsetOk, Visited); } diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp index 0e6bcbfae4f6..873a27543dd6 100644 --- a/contrib/llvm/lib/Analysis/Loads.cpp +++ b/contrib/llvm/lib/Analysis/Loads.cpp @@ -17,6 +17,7 @@ #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Operator.h" using namespace llvm; @@ -160,10 +161,15 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, /// MaxInstsToScan specifies the maximum instructions to scan in the block. If /// it is set to 0, it will scan the whole block. You can also optionally /// specify an alias analysis implementation, which makes this more precise. +/// +/// If TBAATag is non-null and a load or store is found, the TBAA tag from the +/// load or store is recorded there. If there is no TBAA tag or if no access +/// is found, it is left unmodified. Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AliasAnalysis *AA) { + AliasAnalysis *AA, + MDNode **TBAATag) { if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; // If we're using alias analysis to disambiguate get the size of *Ptr. @@ -191,15 +197,19 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // (This is true even if the load is volatile or atomic, although // those cases are unlikely.) if (LoadInst *LI = dyn_cast(Inst)) - if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) + if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) { + if (TBAATag) *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa); return LI; + } if (StoreInst *SI = dyn_cast(Inst)) { // If this is a store through Ptr, the value is available! // (This is true even if the store is volatile or atomic, although // those cases are unlikely.) - if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) + if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) { + if (TBAATag) *TBAATag = SI->getMetadata(LLVMContext::MD_tbaa); return SI->getOperand(0); + } // If Ptr is an alloca and this is a store to a different alloca, ignore // the store. This is a trivial form of alias analysis that is important diff --git a/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp index 3997ac478b52..463269d9d984 100644 --- a/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp @@ -91,8 +91,6 @@ static Value *GetPointerOperand(Value *I) { if (StoreInst *i = dyn_cast(I)) return i->getPointerOperand(); llvm_unreachable("Value is no load or store instruction!"); - // Never reached. - return 0; } static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA, diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp index 85aaccaefc37..f7a60a1737d4 100644 --- a/contrib/llvm/lib/Analysis/LoopInfo.cpp +++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/Instructions.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" @@ -95,7 +96,7 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, // Test if the value is already loop-invariant. if (isLoopInvariant(I)) return true; - if (!I->isSafeToSpeculativelyExecute()) + if (!isSafeToSpeculativelyExecute(I)) return false; if (I->mayReadFromMemory()) return false; @@ -165,99 +166,6 @@ PHINode *Loop::getCanonicalInductionVariable() const { return 0; } -/// getTripCount - Return a loop-invariant LLVM value indicating the number of -/// times the loop will be executed. Note that this means that the backedge -/// of the loop executes N-1 times. If the trip-count cannot be determined, -/// this returns null. -/// -/// The IndVarSimplify pass transforms loops to have a form that this -/// function easily understands. -/// -Value *Loop::getTripCount() const { - // Canonical loops will end with a 'cmp ne I, V', where I is the incremented - // canonical induction variable and V is the trip count of the loop. - PHINode *IV = getCanonicalInductionVariable(); - if (IV == 0 || IV->getNumIncomingValues() != 2) return 0; - - bool P0InLoop = contains(IV->getIncomingBlock(0)); - Value *Inc = IV->getIncomingValue(!P0InLoop); - BasicBlock *BackedgeBlock = IV->getIncomingBlock(!P0InLoop); - - if (BranchInst *BI = dyn_cast(BackedgeBlock->getTerminator())) - if (BI->isConditional()) { - if (ICmpInst *ICI = dyn_cast(BI->getCondition())) { - if (ICI->getOperand(0) == Inc) { - if (BI->getSuccessor(0) == getHeader()) { - if (ICI->getPredicate() == ICmpInst::ICMP_NE) - return ICI->getOperand(1); - } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) { - return ICI->getOperand(1); - } - } - } - } - - return 0; -} - -/// getSmallConstantTripCount - Returns the trip count of this loop as a -/// normal unsigned value, if possible. Returns 0 if the trip count is unknown -/// or not constant. Will also return 0 if the trip count is very large -/// (>= 2^32) -unsigned Loop::getSmallConstantTripCount() const { - Value* TripCount = this->getTripCount(); - if (TripCount) { - if (ConstantInt *TripCountC = dyn_cast(TripCount)) { - // Guard against huge trip counts. - if (TripCountC->getValue().getActiveBits() <= 32) { - return (unsigned)TripCountC->getZExtValue(); - } - } - } - return 0; -} - -/// getSmallConstantTripMultiple - Returns the largest constant divisor of the -/// trip count of this loop as a normal unsigned value, if possible. This -/// means that the actual trip count is always a multiple of the returned -/// value (don't forget the trip count could very well be zero as well!). -/// -/// Returns 1 if the trip count is unknown or not guaranteed to be the -/// multiple of a constant (which is also the case if the trip count is simply -/// constant, use getSmallConstantTripCount for that case), Will also return 1 -/// if the trip count is very large (>= 2^32). -unsigned Loop::getSmallConstantTripMultiple() const { - Value* TripCount = this->getTripCount(); - // This will hold the ConstantInt result, if any - ConstantInt *Result = NULL; - if (TripCount) { - // See if the trip count is constant itself - Result = dyn_cast(TripCount); - // if not, see if it is a multiplication - if (!Result) - if (BinaryOperator *BO = dyn_cast(TripCount)) { - switch (BO->getOpcode()) { - case BinaryOperator::Mul: - Result = dyn_cast(BO->getOperand(1)); - break; - case BinaryOperator::Shl: - if (ConstantInt *CI = dyn_cast(BO->getOperand(1))) - if (CI->getValue().getActiveBits() <= 5) - return 1u << CI->getZExtValue(); - break; - default: - break; - } - } - } - // Guard against huge trip counts. - if (Result && Result->getValue().getActiveBits() <= 32) { - return (unsigned)Result->getZExtValue(); - } else { - return 1; - } -} - /// isLCSSAForm - Return true if the Loop is in LCSSA form bool Loop::isLCSSAForm(DominatorTree &DT) const { // Sort the blocks vector so that we can use binary search to do quick @@ -297,6 +205,17 @@ bool Loop::isLoopSimplifyForm() const { return getLoopPreheader() && getLoopLatch() && hasDedicatedExits(); } +/// isSafeToClone - Return true if the loop body is safe to clone in practice. +/// Routines that reform the loop CFG and split edges often fail on indirectbr. +bool Loop::isSafeToClone() const { + // Return false if any loop blocks contain indirectbrs. + for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) { + if (isa((*I)->getTerminator())) + return false; + } + return true; +} + /// hasDedicatedExits - Return true if no exit block for the loop /// has a predecessor that is outside the loop. bool Loop::hasDedicatedExits() const { @@ -477,21 +396,19 @@ void UnloopUpdater::updateBlockParents() { /// removeBlocksFromAncestors - Remove unloop's blocks from all ancestors below /// their new parents. void UnloopUpdater::removeBlocksFromAncestors() { - // Remove unloop's blocks from all ancestors below their new parents. + // Remove all unloop's blocks (including those in nested subloops) from + // ancestors below the new parent loop. for (Loop::block_iterator BI = Unloop->block_begin(), BE = Unloop->block_end(); BI != BE; ++BI) { - Loop *NewParent = LI->getLoopFor(*BI); - // If this block is an immediate subloop, remove all blocks (including - // nested subloops) from ancestors below the new parent loop. - // Otherwise, if this block is in a nested subloop, skip it. - if (SubloopParents.count(NewParent)) - NewParent = SubloopParents[NewParent]; - else if (Unloop->contains(NewParent)) - continue; - + Loop *OuterParent = LI->getLoopFor(*BI); + if (Unloop->contains(OuterParent)) { + while (OuterParent->getParentLoop() != Unloop) + OuterParent = OuterParent->getParentLoop(); + OuterParent = SubloopParents[OuterParent]; + } // Remove blocks from former Ancestors except Unloop itself which will be // deleted. - for (Loop *OldParent = Unloop->getParentLoop(); OldParent != NewParent; + for (Loop *OldParent = Unloop->getParentLoop(); OldParent != OuterParent; OldParent = OldParent->getParentLoop()) { assert(OldParent && "new loop is not an ancestor of the original"); OldParent->removeBlockFromLoop(*BI); diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp index 5ba1f4045d1a..aba700ac5c34 100644 --- a/contrib/llvm/lib/Analysis/LoopPass.cpp +++ b/contrib/llvm/lib/Analysis/LoopPass.cpp @@ -14,10 +14,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopPass.h" -#include "llvm/DebugInfoProbe.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Timer.h" using namespace llvm; @@ -53,20 +51,6 @@ class PrintLoopPass : public LoopPass { char PrintLoopPass::ID = 0; } -//===----------------------------------------------------------------------===// -// DebugInfoProbe - -static DebugInfoProbeInfo *TheDebugProbe; -static void createDebugInfoProbe() { - if (TheDebugProbe) return; - - // Constructed the first time this is called. This guarantees that the - // object will be constructed, if -enable-debug-info-probe is set, - // before static globals, thus it will be destroyed before them. - static ManagedStatic DIP; - TheDebugProbe = &*DIP; -} - //===----------------------------------------------------------------------===// // LPPassManager // @@ -195,7 +179,6 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const { bool LPPassManager::runOnFunction(Function &F) { LI = &getAnalysis(); bool Changed = false; - createDebugInfoProbe(); // Collect inherited analysis from Module level pass manager. populateInheritedAnalysis(TPM->activeStack); @@ -227,21 +210,19 @@ bool LPPassManager::runOnFunction(Function &F) { // Run all passes on the current Loop. for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *P = getContainedPass(Index); + dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, CurrentLoop->getHeader()->getName()); dumpRequiredSet(P); initializeAnalysisImpl(P); - if (TheDebugProbe) - TheDebugProbe->initialize(P, F); + { PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader()); TimeRegion PassTimer(getPassTimer(P)); Changed |= P->runOnLoop(CurrentLoop, *this); } - if (TheDebugProbe) - TheDebugProbe->finalize(P, F); if (Changed) dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp index fde07ea4f98d..22414b36d5a4 100644 --- a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp +++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp @@ -130,7 +130,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { AliasAnalysis::Location Loc = AA.getLocation(LI); MDA.getNonLocalPointerDependency(Loc, true, LI->getParent(), NLDI); } else if (StoreInst *SI = dyn_cast(Inst)) { - if (!LI->isUnordered()) { + if (!SI->isUnordered()) { // FIXME: Handle atomic/volatile stores. Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown), static_cast(0))); diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp index 8d451c46f9b0..b145650b0f0a 100644 --- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -48,10 +48,10 @@ static bool isMallocCall(const CallInst *CI) { // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin // attribute will exist. FunctionType *FTy = Callee->getFunctionType(); - if (FTy->getNumParams() != 1) - return false; - return FTy->getParamType(0)->isIntegerTy(32) || - FTy->getParamType(0)->isIntegerTy(64); + return FTy->getReturnType() == Type::getInt8PtrTy(FTy->getContext()) && + FTy->getNumParams() == 1 && + (FTy->getParamType(0)->isIntegerTy(32) || + FTy->getParamType(0)->isIntegerTy(64)); } /// extractMallocCall - Returns the corresponding CallInst if the instruction diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 92967c08dc21..3a544f35d502 100644 --- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -22,6 +22,7 @@ #include "llvm/Function.h" #include "llvm/LLVMContext.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -91,6 +92,7 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool MemoryDependenceAnalysis::runOnFunction(Function &) { AA = &getAnalysis(); TD = getAnalysisIfAvailable(); + DT = getAnalysisIfAvailable(); if (PredCache == 0) PredCache.reset(new PredIteratorCache()); return false; @@ -321,14 +323,100 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, !TD.fitsInLegalInteger(NewLoadByteSize*8)) return 0; + if (LIOffs+NewLoadByteSize > MemLocEnd && + LI->getParent()->getParent()->hasFnAttr(Attribute::AddressSafety)) { + // We will be reading past the location accessed by the original program. + // While this is safe in a regular build, Address Safety analysis tools + // may start reporting false warnings. So, don't do widening. + return 0; + } + // If a load of this width would include all of MemLoc, then we succeed. if (LIOffs+NewLoadByteSize >= MemLocEnd) return NewLoadByteSize; NewLoadByteSize <<= 1; } - - return 0; +} + +namespace { + /// Only find pointer captures which happen before the given instruction. Uses + /// the dominator tree to determine whether one instruction is before another. + struct CapturesBefore : public CaptureTracker { + CapturesBefore(const Instruction *I, DominatorTree *DT) + : BeforeHere(I), DT(DT), Captured(false) {} + + void tooManyUses() { Captured = true; } + + bool shouldExplore(Use *U) { + Instruction *I = cast(U->getUser()); + BasicBlock *BB = I->getParent(); + if (BeforeHere != I && + (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I))) + return false; + return true; + } + + bool captured(Use *U) { + Instruction *I = cast(U->getUser()); + BasicBlock *BB = I->getParent(); + if (BeforeHere != I && + (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I))) + return false; + Captured = true; + return true; + } + + const Instruction *BeforeHere; + DominatorTree *DT; + + bool Captured; + }; +} + +AliasAnalysis::ModRefResult +MemoryDependenceAnalysis::getModRefInfo(const Instruction *Inst, + const AliasAnalysis::Location &MemLoc) { + AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc); + if (MR != AliasAnalysis::ModRef) return MR; + + // FIXME: this is really just shoring-up a deficiency in alias analysis. + // BasicAA isn't willing to spend linear time determining whether an alloca + // was captured before or after this particular call, while we are. However, + // with a smarter AA in place, this test is just wasting compile time. + if (!DT) return AliasAnalysis::ModRef; + const Value *Object = GetUnderlyingObject(MemLoc.Ptr, TD); + if (!isIdentifiedObject(Object) || isa(Object)) + return AliasAnalysis::ModRef; + ImmutableCallSite CS(Inst); + if (!CS.getInstruction()) return AliasAnalysis::ModRef; + + CapturesBefore CB(Inst, DT); + llvm::PointerMayBeCaptured(Object, &CB); + + if (isa(Object) || CS.getInstruction() == Object || CB.Captured) + return AliasAnalysis::ModRef; + + unsigned ArgNo = 0; + for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); + CI != CE; ++CI, ++ArgNo) { + // Only look at the no-capture or byval pointer arguments. If this + // pointer were passed to arguments that were neither of these, then it + // couldn't be no-capture. + if (!(*CI)->getType()->isPointerTy() || + (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo))) + continue; + + // If this is a no-capture pointer argument, see if we can tell that it + // is impossible to alias the pointer we're checking. If not, we have to + // assume that the call could touch the pointer, even though it doesn't + // escape. + if (!AA->isNoAlias(AliasAnalysis::Location(*CI), + AliasAnalysis::Location(Object))) { + return AliasAnalysis::ModRef; + } + } + return AliasAnalysis::NoModRef; } /// getPointerDependencyFrom - Return the instruction on which a memory @@ -478,7 +566,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, } // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. - switch (AA->getModRefInfo(Inst, MemLoc)) { + switch (getModRefInfo(Inst, MemLoc)) { case AliasAnalysis::NoModRef: // If the call has no effect on the queried pointer, just ignore it. continue; diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp index 7e22ddc61c09..38cb1c91f8f8 100644 --- a/contrib/llvm/lib/Analysis/PHITransAddr.cpp +++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Analysis/Dominators.h" @@ -27,7 +28,7 @@ static bool CanPHITrans(Instruction *Inst) { return true; if (isa(Inst) && - Inst->isSafeToSpeculativelyExecute()) + isSafeToSpeculativelyExecute(Inst)) return true; if (Inst->getOpcode() == Instruction::Add && @@ -73,7 +74,6 @@ static bool VerifySubExpr(Value *Expr, errs() << *I << '\n'; llvm_unreachable("Either something is missing from InstInputs or " "CanPHITrans is wrong."); - return false; } // Validate the operands of the instruction. @@ -100,7 +100,6 @@ bool PHITransAddr::Verify() const { for (unsigned i = 0, e = InstInputs.size(); i != e; ++i) errs() << " InstInput #" << i << " is " << *InstInputs[i] << "\n"; llvm_unreachable("This is unexpected."); - return false; } // a-ok. @@ -186,7 +185,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // operands need to be phi translated, and if so, reconstruct it. if (CastInst *Cast = dyn_cast(Inst)) { - if (!Cast->isSafeToSpeculativelyExecute()) return 0; + if (!isSafeToSpeculativelyExecute(Cast)) return 0; Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT); if (PHIIn == 0) return 0; if (PHIIn == Cast->getOperand(0)) @@ -228,7 +227,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, return GEP; // Simplify the GEP to handle 'gep x, 0' -> x etc. - if (Value *V = SimplifyGEPInst(GEPOps, TD, DT)) { + if (Value *V = SimplifyGEPInst(GEPOps, TD, TLI, DT)) { for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) RemoveInstInputs(GEPOps[i], InstInputs); @@ -284,7 +283,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, } // See if the add simplifies away. - if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, DT)) { + if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, TLI, DT)) { // If we simplified the operands, the LHS is no longer an input, but Res // is. RemoveInstInputs(LHS, InstInputs); @@ -381,7 +380,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, // Handle cast of PHI translatable value. if (CastInst *Cast = dyn_cast(Inst)) { - if (!Cast->isSafeToSpeculativelyExecute()) return 0; + if (!isSafeToSpeculativelyExecute(Cast)) return 0; Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0), CurBB, PredBB, DT, NewInsts); if (OpVal == 0) return 0; diff --git a/contrib/llvm/lib/Analysis/PathNumbering.cpp b/contrib/llvm/lib/Analysis/PathNumbering.cpp index 0e3b6e69ce34..80c5222a27a5 100644 --- a/contrib/llvm/lib/Analysis/PathNumbering.cpp +++ b/contrib/llvm/lib/Analysis/PathNumbering.cpp @@ -386,8 +386,8 @@ void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) { } TerminatorInst* terminator = currentNode->getBlock()->getTerminator(); - if(isa(terminator) || isa(terminator) - || isa(terminator) || isa(terminator)) + if(isa(terminator) || isa(terminator) || + isa(terminator)) addEdge(currentNode, getExit(),0); currentNode->setColor(BallLarusNode::GRAY); diff --git a/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp index 0ae734e259db..0fcdfe75aefd 100644 --- a/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp +++ b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp @@ -137,22 +137,22 @@ bool PathProfileVerifier::runOnModule (Module &M) { BasicBlock* source = nextEdge->getSource(); BasicBlock* target = nextEdge->getTarget(); unsigned duplicateNumber = nextEdge->getDuplicateNumber(); - DEBUG(dbgs () << source->getNameStr() << " --{" << duplicateNumber - << "}--> " << target->getNameStr()); + DEBUG(dbgs() << source->getName() << " --{" << duplicateNumber + << "}--> " << target->getName()); // Ensure all the referenced edges exist // TODO: make this a separate function if( !arrayMap.count(source) ) { - errs() << " error [" << F->getNameStr() << "()]: source '" - << source->getNameStr() + errs() << " error [" << F->getName() << "()]: source '" + << source->getName() << "' does not exist in the array map.\n"; } else if( !arrayMap[source].count(target) ) { - errs() << " error [" << F->getNameStr() << "()]: target '" - << target->getNameStr() + errs() << " error [" << F->getName() << "()]: target '" + << target->getName() << "' does not exist in the array map.\n"; } else if( !arrayMap[source][target].count(duplicateNumber) ) { - errs() << " error [" << F->getNameStr() << "()]: edge " - << source->getNameStr() << " -> " << target->getNameStr() + errs() << " error [" << F->getName() << "()]: edge " + << source->getName() << " -> " << target->getName() << " duplicate number " << duplicateNumber << " does not exist in the array map.\n"; } else { diff --git a/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp index b594e2ba5506..63468f842612 100644 --- a/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp +++ b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp @@ -332,7 +332,7 @@ bool ProfileEstimatorPass::runOnFunction(Function &F) { // Clear Minimal Edges. MinimalWeight.clear(); - DEBUG(dbgs() << "Working on function " << F.getNameStr() << "\n"); + DEBUG(dbgs() << "Working on function " << F.getName() << "\n"); // Since the entry block is the first one and has no predecessors, the edge // (0,entry) is inserted with the starting weight of 1. diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp index 098079bcffc4..c4da8079a511 100644 --- a/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp +++ b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -160,7 +160,7 @@ bool LoaderPass::runOnModule(Module &M) { ReadCount = 0; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; - DEBUG(dbgs()<<"Working on "<getNameStr()<<"\n"); + DEBUG(dbgs() << "Working on " << F->getName() << "\n"); readEdge(getEdge(0,&F->getEntryBlock()), Counters); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { TerminatorInst *TI = BB->getTerminator(); @@ -181,7 +181,7 @@ bool LoaderPass::runOnModule(Module &M) { ReadCount = 0; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; - DEBUG(dbgs()<<"Working on "<getNameStr()<<"\n"); + DEBUG(dbgs() << "Working on " << F->getName() << "\n"); readEdge(getEdge(0,&F->getEntryBlock()), Counters); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { TerminatorInst *TI = BB->getTerminator(); diff --git a/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp index a01751849c51..0cb158865afe 100644 --- a/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp +++ b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp @@ -30,7 +30,7 @@ static cl::opt ProfileVerifierDisableAssertions("profile-verifier-noassert", cl::desc("Disable assertions")); -namespace llvm { +namespace { template class ProfileVerifierPassT : public FunctionPass { @@ -125,8 +125,8 @@ namespace llvm { outCount++; } } - dbgs() << "Block " << BB->getNameStr() << " in " - << BB->getParent()->getNameStr() << ":" + dbgs() << "Block " << BB->getName() << " in " + << BB->getParent()->getName() << ":" << "BBWeight=" << format("%20.20g",BBWeight) << "," << "inWeight=" << format("%20.20g",inWeight) << "," << "inCount=" << inCount << "," @@ -143,8 +143,8 @@ namespace llvm { template void ProfileVerifierPassT::debugEntry (DetailedBlockInfo *DI) { - dbgs() << "TROUBLE: Block " << DI->BB->getNameStr() << " in " - << DI->BB->getParent()->getNameStr() << ":" + dbgs() << "TROUBLE: Block " << DI->BB->getName() << " in " + << DI->BB->getParent()->getName() << ":" << "BBWeight=" << format("%20.20g",DI->BBWeight) << "," << "inWeight=" << format("%20.20g",DI->inWeight) << "," << "inCount=" << DI->inCount << "," @@ -201,13 +201,13 @@ namespace llvm { double EdgeWeight = PI->getEdgeWeight(E); if (EdgeWeight == ProfileInfoT::MissingValue) { dbgs() << "Edge " << E << " in Function " - << ProfileInfoT::getFunction(E)->getNameStr() << ": "; + << ProfileInfoT::getFunction(E)->getName() << ": "; ASSERTMESSAGE("Edge has missing value"); return 0; } else { if (EdgeWeight < 0) { dbgs() << "Edge " << E << " in Function " - << ProfileInfoT::getFunction(E)->getNameStr() << ": "; + << ProfileInfoT::getFunction(E)->getName() << ": "; ASSERTMESSAGE("Edge has negative value"); } return EdgeWeight; @@ -220,8 +220,8 @@ namespace llvm { DetailedBlockInfo *DI) { if (Error) { DEBUG(debugEntry(DI)); - dbgs() << "Block " << DI->BB->getNameStr() << " in Function " - << DI->BB->getParent()->getNameStr() << ": "; + dbgs() << "Block " << DI->BB->getName() << " in Function " + << DI->BB->getParent()->getName() << ": "; ASSERTMESSAGE(Message); } return; diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp index 52753cbe85af..b507b1e340f5 100644 --- a/contrib/llvm/lib/Analysis/RegionInfo.cpp +++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp @@ -186,18 +186,16 @@ std::string Region::getNameStr() const { raw_string_ostream OS(entryName); WriteAsOperand(OS, getEntry(), false); - entryName = OS.str(); } else - entryName = getEntry()->getNameStr(); + entryName = getEntry()->getName(); if (getExit()) { if (getExit()->getName().empty()) { raw_string_ostream OS(exitName); WriteAsOperand(OS, getExit(), false); - exitName = OS.str(); } else - exitName = getExit()->getNameStr(); + exitName = getExit()->getName(); } else exitName = ""; @@ -652,7 +650,7 @@ void RegionInfo::buildRegionsTree(DomTreeNode *N, Region *region) { // This basic block is a start block of a region. It is already in the // BBtoRegion relation. Only the child basic blocks have to be updated. if (it != BBtoRegion.end()) { - Region *newRegion = it->second;; + Region *newRegion = it->second; region->addSubRegion(getTopMostParent(newRegion)); region = newRegion; } else { diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index e0ac56c65e76..1d55642079a0 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -74,6 +74,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" @@ -108,6 +109,7 @@ INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution", "Scalar Evolution Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution", "Scalar Evolution Analysis", false, true) char ScalarEvolution::ID = 0; @@ -188,6 +190,14 @@ void SCEV::print(raw_ostream &OS) const { OS << OpStr; } OS << ")"; + switch (NAry->getSCEVType()) { + case scAddExpr: + case scMulExpr: + if (NAry->getNoWrapFlags(FlagNUW)) + OS << ""; + if (NAry->getNoWrapFlags(FlagNSW)) + OS << ""; + } return; } case scUDivExpr: { @@ -249,11 +259,9 @@ Type *SCEV::getType() const { return cast(this)->getType(); case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return 0; - default: break; + default: + llvm_unreachable("Unknown SCEV kind!"); } - llvm_unreachable("Unknown SCEV kind!"); - return 0; } bool SCEV::isZero() const { @@ -274,6 +282,20 @@ bool SCEV::isAllOnesValue() const { return false; } +/// isNonConstantNegative - Return true if the specified scev is negated, but +/// not a constant. +bool SCEV::isNonConstantNegative() const { + const SCEVMulExpr *Mul = dyn_cast(this); + if (!Mul) return false; + + // If there is a constant factor, it will be first. + const SCEVConstant *SC = dyn_cast(Mul->getOperand(0)); + if (!SC) return false; + + // Return true if the value is negative, this matches things like (-42 * V). + return SC->getValue()->getValue().isNegative(); +} + SCEVCouldNotCompute::SCEVCouldNotCompute() : SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {} @@ -587,11 +609,8 @@ namespace { } default: - break; + llvm_unreachable("Unknown SCEV kind!"); } - - llvm_unreachable("Unknown SCEV kind!"); - return 0; } }; } @@ -2581,7 +2600,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) { Constant *C = ConstantExpr::getSizeOf(AllocTy); if (ConstantExpr *CE = dyn_cast(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); @@ -2590,7 +2609,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) { const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) { Constant *C = ConstantExpr::getAlignOf(AllocTy); if (ConstantExpr *CE = dyn_cast(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); @@ -2607,7 +2626,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy, Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); if (ConstantExpr *CE = dyn_cast(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); @@ -2617,7 +2636,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy, Constant *FieldNo) { Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo); if (ConstantExpr *CE = dyn_cast(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); @@ -3108,7 +3127,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // PHI's incoming blocks are in a different loop, in which case doing so // risks breaking LCSSA form. Instcombine would normally zap these, but // it doesn't have DominatorTree information, so it may miss cases. - if (Value *V = SimplifyInstruction(PN, TD, DT)) + if (Value *V = SimplifyInstruction(PN, TD, TLI, DT)) if (LI->replacementPreservesLCSSAForm(PN, V)) return getSCEV(V); @@ -3168,7 +3187,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { // Add the total offset from all the GEP indices to the base. return getAddExpr(BaseS, TotalOffset, - isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap); + isInBounds ? SCEV::FlagNUW : SCEV::FlagAnyWrap); } /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is @@ -3242,9 +3261,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast(S)) { // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones); + ComputeMaskedBits(U->getValue(), Zeros, Ones); return Zeros.countTrailingOnes(); } @@ -3382,9 +3400,8 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast(S)) { // For a SCEVUnknown, ask ValueTracking. - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD); + ComputeMaskedBits(U->getValue(), Zeros, Ones, TD); if (Ones == ~Zeros + 1) return setUnsignedRange(U, ConservativeResult); return setUnsignedRange(U, @@ -3584,6 +3601,12 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // because it leads to N-1 getAddExpr calls for N ultimate operands. // Instead, gather up all the operands and make a single getAddExpr call. // LLVM IR canonical form means we need only traverse the left operands. + // + // Don't apply this instruction's NSW or NUW flags to the new + // expression. The instruction may be guarded by control flow that the + // no-wrap behavior depends on. Non-control-equivalent instructions can be + // mapped to the same SCEV expression, and it would be incorrect to transfer + // NSW/NUW semantics to those operations. SmallVector AddOps; AddOps.push_back(getSCEV(U->getOperand(1))); for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) { @@ -3598,16 +3621,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { AddOps.push_back(Op1); } AddOps.push_back(getSCEV(U->getOperand(0))); - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; - OverflowingBinaryOperator *OBO = cast(V); - if (OBO->hasNoSignedWrap()) - setFlags(Flags, SCEV::FlagNSW); - if (OBO->hasNoUnsignedWrap()) - setFlags(Flags, SCEV::FlagNUW); - return getAddExpr(AddOps, Flags); + return getAddExpr(AddOps); } case Instruction::Mul: { - // See the Add code above. + // Don't transfer NSW/NUW for the same reason as AddExpr. SmallVector MulOps; MulOps.push_back(getSCEV(U->getOperand(1))); for (Value *Op = U->getOperand(0); @@ -3641,9 +3658,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // knew about to reconstruct a low-bits mask value. unsigned LZ = A.countLeadingZeros(); unsigned BitWidth = A.getBitWidth(); - APInt AllOnes = APInt::getAllOnesValue(BitWidth); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD); + ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ); @@ -3915,13 +3931,19 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // /// getSmallConstantTripCount - Returns the maximum trip count of this loop as a -/// normal unsigned value, if possible. Returns 0 if the trip count is unknown -/// or not constant. Will also return 0 if the maximum trip count is very large -/// (>= 2^32) -unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L, - BasicBlock *ExitBlock) { +/// normal unsigned value. Returns 0 if the trip count is unknown or not +/// constant. Will also return 0 if the maximum trip count is very large (>= +/// 2^32). +/// +/// This "trip count" assumes that control exits via ExitingBlock. More +/// precisely, it is the number of times that control may reach ExitingBlock +/// before taking the branch. For loops with multiple exits, it may not be the +/// number times that the loop header executes because the loop may exit +/// prematurely via another branch. +unsigned ScalarEvolution:: +getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock) { const SCEVConstant *ExitCount = - dyn_cast(getExitCount(L, ExitBlock)); + dyn_cast(getExitCount(L, ExitingBlock)); if (!ExitCount) return 0; @@ -3944,9 +3966,12 @@ unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L, /// multiple of a constant (which is also the case if the trip count is simply /// constant, use getSmallConstantTripCount for that case), Will also return 1 /// if the trip count is very large (>= 2^32). -unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L, - BasicBlock *ExitBlock) { - const SCEV *ExitCount = getExitCount(L, ExitBlock); +/// +/// As explained in the comments for getSmallConstantTripCount, this assumes +/// that control exits the loop via ExitingBlock. +unsigned ScalarEvolution:: +getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) { + const SCEV *ExitCount = getExitCount(L, ExitingBlock); if (ExitCount == getCouldNotCompute()) return 1; @@ -4153,13 +4178,19 @@ void ScalarEvolution::forgetValue(Value *V) { } /// getExact - Get the exact loop backedge taken count considering all loop -/// exits. If all exits are computable, this is the minimum computed count. +/// exits. A computable result can only be return for loops with a single exit. +/// Returning the minimum taken count among all exits is incorrect because one +/// of the loop's exit limit's may have been skipped. HowFarToZero assumes that +/// the limit of each loop test is never skipped. This is a valid assumption as +/// long as the loop exits via that test. For precise results, it is the +/// caller's responsibility to specify the relevant loop exit using +/// getExact(ExitingBlock, SE). const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { // If any exits were not computable, the loop is not computable. if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute(); - // We need at least one computable exit. + // We need exactly one computable exit. if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute(); assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info"); @@ -4171,8 +4202,8 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { if (!BECount) BECount = ENT->ExactNotTaken; - else - BECount = SE->getUMinFromMismatchedTypes(BECount, ENT->ExactNotTaken); + else if (BECount != ENT->ExactNotTaken) + return SE->getCouldNotCompute(); } assert(BECount && "Invalid not taken count for loop exit"); return BECount; @@ -4253,8 +4284,15 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { if (MaxBECount == getCouldNotCompute()) MaxBECount = EL.Max; - else if (EL.Max != getCouldNotCompute()) - MaxBECount = getUMinFromMismatchedTypes(MaxBECount, EL.Max); + else if (EL.Max != getCouldNotCompute()) { + // We cannot take the "min" MaxBECount, because non-unit stride loops may + // skip some loop tests. Taking the max over the exits is sufficiently + // conservative. TODO: We could do better taking into consideration + // that (1) the loop has unit stride (2) the last loop test is + // less-than/greater-than (3) any loop test is less-than/greater-than AND + // falls-through some constant times less then the other tests. + MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max); + } } return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); @@ -4539,40 +4577,6 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, return cast(Val)->getValue(); } -/// GetAddressedElementFromGlobal - Given a global variable with an initializer -/// and a GEP expression (missing the pointer index) indexing into it, return -/// the addressed element of the initializer or null if the index expression is -/// invalid. -static Constant * -GetAddressedElementFromGlobal(GlobalVariable *GV, - const std::vector &Indices) { - Constant *Init = GV->getInitializer(); - for (unsigned i = 0, e = Indices.size(); i != e; ++i) { - uint64_t Idx = Indices[i]->getZExtValue(); - if (ConstantStruct *CS = dyn_cast(Init)) { - assert(Idx < CS->getNumOperands() && "Bad struct index!"); - Init = cast(CS->getOperand(Idx)); - } else if (ConstantArray *CA = dyn_cast(Init)) { - if (Idx >= CA->getNumOperands()) return 0; // Bogus program - Init = cast(CA->getOperand(Idx)); - } else if (isa(Init)) { - if (StructType *STy = dyn_cast(Init->getType())) { - assert(Idx < STy->getNumElements() && "Bad struct index!"); - Init = Constant::getNullValue(STy->getElementType(Idx)); - } else if (ArrayType *ATy = dyn_cast(Init->getType())) { - if (Idx >= ATy->getNumElements()) return 0; // Bogus program - Init = Constant::getNullValue(ATy->getElementType()); - } else { - llvm_unreachable("Unknown constant aggregate type!"); - } - return 0; - } else { - return 0; // Unknown initializer type - } - } - return Init; -} - /// ComputeLoadConstantCompareExitLimit - Given an exit condition of /// 'icmp op load X, cst', try to see if we can compute the backedge /// execution count. @@ -4600,7 +4604,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( // Okay, we allow one non-constant index into the GEP instruction. Value *VarIdx = 0; - std::vector Indexes; + std::vector Indexes; unsigned VarIdxNum = 0; for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i) if (ConstantInt *CI = dyn_cast(GEP->getOperand(i))) { @@ -4612,6 +4616,10 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( Indexes.push_back(0); } + // Loop-invariant loads may be a byproduct of loop optimization. Skip them. + if (!VarIdx) + return getCouldNotCompute(); + // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant. // Check to see if X is a loop variant variable value now. const SCEV *Idx = getSCEV(VarIdx); @@ -4634,7 +4642,8 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( // Form the GEP offset. Indexes[VarIdxNum] = Val; - Constant *Result = GetAddressedElementFromGlobal(GV, Indexes); + Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(), + Indexes); if (Result == 0) break; // Cannot compute! // Evaluate the condition for this iteration. @@ -4658,7 +4667,8 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( /// specified type, assuming that all operands were constants. static bool CanConstantFold(const Instruction *I) { if (isa(I) || isa(I) || - isa(I) || isa(I) || isa(I)) + isa(I) || isa(I) || isa(I) || + isa(I)) return true; if (const CallInst *CI = dyn_cast(I)) @@ -4748,16 +4758,23 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { /// reason, return null. static Constant *EvaluateExpression(Value *V, const Loop *L, DenseMap &Vals, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { // Convenient constant check, but redundant for recursive calls. if (Constant *C = dyn_cast(V)) return C; + Instruction *I = dyn_cast(V); + if (!I) return 0; - Instruction *I = cast(V); if (Constant *C = Vals.lookup(I)) return C; - assert(!isa(I) && "loop header phis should be mapped to constant"); - assert(canConstantEvolve(I, L) && "cannot evaluate expression in this loop"); - (void)L; + // An instruction inside the loop depends on a value outside the loop that we + // weren't given a mapping for, or a value such as a call inside the loop. + if (!canConstantEvolve(I, L)) return 0; + + // An unmapped PHI can be due to a branch or another loop inside this loop, + // or due to this not being the initial iteration through a loop where we + // couldn't compute the evolution of this particular PHI last time. + if (isa(I)) return 0; std::vector Operands(I->getNumOperands()); @@ -4768,16 +4785,21 @@ static Constant *EvaluateExpression(Value *V, const Loop *L, if (!Operands[i]) return 0; continue; } - Constant *C = EvaluateExpression(Operand, L, Vals, TD); + Constant *C = EvaluateExpression(Operand, L, Vals, TD, TLI); Vals[Operand] = C; if (!C) return 0; Operands[i] = C; } - if (const CmpInst *CI = dyn_cast(I)) + if (CmpInst *CI = dyn_cast(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], - Operands[1], TD); - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD); + Operands[1], TD, TLI); + if (LoadInst *LI = dyn_cast(I)) { + if (!LI->isVolatile()) + return ConstantFoldLoadFromConstPtr(Operands[0], TD); + } + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD, + TLI); } /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is @@ -4798,23 +4820,26 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; - // FIXME: Nick's fix for PR11034 will seed constants for multiple header phis. DenseMap CurrentIterVals; + BasicBlock *Header = L->getHeader(); + assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); // Since the loop is canonicalized, the PHI node must have two entries. One // entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - Constant *StartCST = - dyn_cast(PN->getIncomingValue(!SecondIsBackedge)); - if (StartCST == 0) - return RetVal = 0; // Must be a constant. - CurrentIterVals[PN] = StartCST; + PHINode *PHI = 0; + for (BasicBlock::iterator I = Header->begin(); + (PHI = dyn_cast(I)); ++I) { + Constant *StartCST = + dyn_cast(PHI->getIncomingValue(!SecondIsBackedge)); + if (StartCST == 0) continue; + CurrentIterVals[PHI] = StartCST; + } + if (!CurrentIterVals.count(PN)) + return RetVal = 0; Value *BEValue = PN->getIncomingValue(SecondIsBackedge); - if (getConstantEvolvingPHI(BEValue, L) != PN && - !isa(BEValue)) - return RetVal = 0; // Not derived from same PHI. // Execute the loop symbolically to determine the exit value. if (BEs.getActiveBits() >= 32) @@ -4826,15 +4851,46 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, if (IterationNum == NumIterations) return RetVal = CurrentIterVals[PN]; // Got exit value! - // Compute the value of the PHI node for the next iteration. + // Compute the value of the PHIs for the next iteration. // EvaluateExpression adds non-phi values to the CurrentIterVals map. - Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); - if (NextPHI == CurrentIterVals[PN]) - return RetVal = NextPHI; // Stopped evolving! + DenseMap NextIterVals; + Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, + TLI); if (NextPHI == 0) return 0; // Couldn't evaluate! - DenseMap NextIterVals; NextIterVals[PN] = NextPHI; + + bool StoppedEvolving = NextPHI == CurrentIterVals[PN]; + + // Also evaluate the other PHI nodes. However, we don't get to stop if we + // cease to be able to evaluate one of them or if they stop evolving, + // because that doesn't necessarily prevent us from computing PN. + SmallVector, 8> PHIsToCompute; + for (DenseMap::const_iterator + I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ + PHINode *PHI = dyn_cast(I->first); + if (!PHI || PHI == PN || PHI->getParent() != Header) continue; + PHIsToCompute.push_back(std::make_pair(PHI, I->second)); + } + // We use two distinct loops because EvaluateExpression may invalidate any + // iterators into CurrentIterVals. + for (SmallVectorImpl >::const_iterator + I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) { + PHINode *PHI = I->first; + Constant *&NextPHI = NextIterVals[PHI]; + if (!NextPHI) { // Not already computed. + Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI); + } + if (NextPHI != I->second) + StoppedEvolving = false; + } + + // If all entries in CurrentIterVals == NextIterVals then we can stop + // iterating, the loop can't continue to change. + if (StoppedEvolving) + return RetVal = CurrentIterVals[PN]; + CurrentIterVals.swap(NextIterVals); } } @@ -4844,9 +4900,9 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, /// try to evaluate a few iterations of the loop until we get the exit /// condition gets a value of ExitWhen (true or false). If we cannot /// evaluate the trip count of the loop, return getCouldNotCompute(). -const SCEV * ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, - Value *Cond, - bool ExitWhen) { +const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, + Value *Cond, + bool ExitWhen) { PHINode *PN = getConstantEvolvingPHI(Cond, L); if (PN == 0) return getCouldNotCompute(); @@ -4854,29 +4910,33 @@ const SCEV * ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // That's the only form we support here. if (PN->getNumIncomingValues() != 2) return getCouldNotCompute(); + DenseMap CurrentIterVals; + BasicBlock *Header = L->getHeader(); + assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); + // One entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - Constant *StartCST = - dyn_cast(PN->getIncomingValue(!SecondIsBackedge)); - if (StartCST == 0) return getCouldNotCompute(); // Must be a constant. - - Value *BEValue = PN->getIncomingValue(SecondIsBackedge); - if (getConstantEvolvingPHI(BEValue, L) != PN && - !isa(BEValue)) - return getCouldNotCompute(); // Not derived from same PHI. + PHINode *PHI = 0; + for (BasicBlock::iterator I = Header->begin(); + (PHI = dyn_cast(I)); ++I) { + Constant *StartCST = + dyn_cast(PHI->getIncomingValue(!SecondIsBackedge)); + if (StartCST == 0) continue; + CurrentIterVals[PHI] = StartCST; + } + if (!CurrentIterVals.count(PN)) + return getCouldNotCompute(); // Okay, we find a PHI node that defines the trip count of this loop. Execute // the loop symbolically to determine when the condition gets a value of // "ExitWhen". - unsigned IterationNum = 0; + unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. - for (Constant *PHIVal = StartCST; - IterationNum != MaxIterations; ++IterationNum) { - DenseMap PHIValMap; - PHIValMap[PN] = PHIVal; + for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ ConstantInt *CondVal = - dyn_cast_or_null(EvaluateExpression(Cond, L, PHIValMap, TD)); + dyn_cast_or_null(EvaluateExpression(Cond, L, CurrentIterVals, + TD, TLI)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); @@ -4886,11 +4946,29 @@ const SCEV * ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, return getConstant(Type::getInt32Ty(getContext()), IterationNum); } - // Compute the value of the PHI node for the next iteration. - Constant *NextPHI = EvaluateExpression(BEValue, L, PHIValMap, TD); - if (NextPHI == 0 || NextPHI == PHIVal) - return getCouldNotCompute();// Couldn't evaluate or not making progress... - PHIVal = NextPHI; + // Update all the PHI nodes for the next iteration. + DenseMap NextIterVals; + + // Create a list of which PHIs we need to compute. We want to do this before + // calling EvaluateExpression on them because that may invalidate iterators + // into CurrentIterVals. + SmallVector PHIsToCompute; + for (DenseMap::const_iterator + I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ + PHINode *PHI = dyn_cast(I->first); + if (!PHI || PHI->getParent() != Header) continue; + PHIsToCompute.push_back(PHI); + } + for (SmallVectorImpl::const_iterator I = PHIsToCompute.begin(), + E = PHIsToCompute.end(); I != E; ++I) { + PHINode *PHI = *I; + Constant *&NextPHI = NextIterVals[PHI]; + if (NextPHI) continue; // Already computed! + + Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI); + } + CurrentIterVals.swap(NextIterVals); } // Too many iterations were needed to evaluate. @@ -4921,6 +4999,98 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { return C; } +/// This builds up a Constant using the ConstantExpr interface. That way, we +/// will return Constants for objects which aren't represented by a +/// SCEVConstant, because SCEVConstant is restricted to ConstantInt. +/// Returns NULL if the SCEV isn't representable as a Constant. +static Constant *BuildConstantFromSCEV(const SCEV *V) { + switch (V->getSCEVType()) { + default: // TODO: smax, umax. + case scCouldNotCompute: + case scAddRecExpr: + break; + case scConstant: + return cast(V)->getValue(); + case scUnknown: + return dyn_cast(cast(V)->getValue()); + case scSignExtend: { + const SCEVSignExtendExpr *SS = cast(V); + if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand())) + return ConstantExpr::getSExt(CastOp, SS->getType()); + break; + } + case scZeroExtend: { + const SCEVZeroExtendExpr *SZ = cast(V); + if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand())) + return ConstantExpr::getZExt(CastOp, SZ->getType()); + break; + } + case scTruncate: { + const SCEVTruncateExpr *ST = cast(V); + if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand())) + return ConstantExpr::getTrunc(CastOp, ST->getType()); + break; + } + case scAddExpr: { + const SCEVAddExpr *SA = cast(V); + if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) { + if (C->getType()->isPointerTy()) + C = ConstantExpr::getBitCast(C, Type::getInt8PtrTy(C->getContext())); + for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) { + Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i)); + if (!C2) return 0; + + // First pointer! + if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) { + std::swap(C, C2); + // The offsets have been converted to bytes. We can add bytes to an + // i8* by GEP with the byte count in the first index. + C = ConstantExpr::getBitCast(C,Type::getInt8PtrTy(C->getContext())); + } + + // Don't bother trying to sum two pointers. We probably can't + // statically compute a load that results from it anyway. + if (C2->getType()->isPointerTy()) + return 0; + + if (C->getType()->isPointerTy()) { + if (cast(C->getType())->getElementType()->isStructTy()) + C2 = ConstantExpr::getIntegerCast( + C2, Type::getInt32Ty(C->getContext()), true); + C = ConstantExpr::getGetElementPtr(C, C2); + } else + C = ConstantExpr::getAdd(C, C2); + } + return C; + } + break; + } + case scMulExpr: { + const SCEVMulExpr *SM = cast(V); + if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) { + // Don't bother with pointers at all. + if (C->getType()->isPointerTy()) return 0; + for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) { + Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i)); + if (!C2 || C2->getType()->isPointerTy()) return 0; + C = ConstantExpr::getMul(C, C2); + } + return C; + } + break; + } + case scUDivExpr: { + const SCEVUDivExpr *SU = cast(V); + if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS())) + if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS())) + if (LHS->getType() == RHS->getType()) + return ConstantExpr::getUDiv(LHS, RHS); + break; + } + } + return 0; +} + const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { if (isa(V)) return V; @@ -4973,11 +5143,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { const SCEV *OpV = getSCEVAtScope(OrigV, L); MadeImprovement |= OrigV != OpV; - Constant *C = 0; - if (const SCEVConstant *SC = dyn_cast(OpV)) - C = SC->getValue(); - if (const SCEVUnknown *SU = dyn_cast(OpV)) - C = dyn_cast(SU->getValue()); + Constant *C = BuildConstantFromSCEV(OpV); if (!C) return V; if (C->getType() != Op->getType()) C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, @@ -4992,10 +5158,14 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { Constant *C = 0; if (const CmpInst *CI = dyn_cast(I)) C = ConstantFoldCompareInstOperands(CI->getPredicate(), - Operands[0], Operands[1], TD); - else + Operands[0], Operands[1], TD, + TLI); + else if (const LoadInst *LI = dyn_cast(I)) { + if (!LI->isVolatile()) + C = ConstantFoldLoadFromConstPtr(Operands[0], TD); + } else C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), - Operands, TD); + Operands, TD, TLI); if (!C) return V; return getSCEV(C); } @@ -5113,7 +5283,6 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { } llvm_unreachable("Unknown SCEV type!"); - return 0; } /// getSCEVAtScope - This is a convenience function which does @@ -5350,10 +5519,10 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // behavior. Loops must exhibit defined behavior until a wrapped value is // actually used. So the trip count computed by udiv could be smaller than the // number of well-defined iterations. - if (AddRec->getNoWrapFlags(SCEV::FlagNW)) + if (AddRec->getNoWrapFlags(SCEV::FlagNW)) { // FIXME: We really want an "isexact" bit for udiv. return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); - + } // Then, try to solve the above equation provided that Start is constant. if (const SCEVConstant *StartC = dyn_cast(Start)) return SolveLinEquationWithOverflow(StepC->getValue()->getValue(), @@ -5744,7 +5913,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, switch (Pred) { default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); - break; case ICmpInst::ICMP_SGT: Pred = ICmpInst::ICMP_SLT; std::swap(LHS, RHS); @@ -6089,8 +6257,9 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, return getCouldNotCompute(); // Check to see if we have a flag which makes analysis easy. - bool NoWrap = isSigned ? AddRec->getNoWrapFlags(SCEV::FlagNSW) : - AddRec->getNoWrapFlags(SCEV::FlagNUW); + bool NoWrap = isSigned ? + AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNW)) : + AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNW)); if (AddRec->isAffine()) { unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); @@ -6381,6 +6550,7 @@ bool ScalarEvolution::runOnFunction(Function &F) { this->F = &F; LI = &getAnalysis(); TD = getAnalysisIfAvailable(); + TLI = &getAnalysis(); DT = &getAnalysis(); return false; } @@ -6417,6 +6587,7 @@ void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); + AU.addRequired(); } bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) { @@ -6592,11 +6763,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { return LoopInvariant; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return LoopVariant; - default: break; + default: llvm_unreachable("Unknown SCEV kind!"); } - llvm_unreachable("Unknown SCEV kind!"); - return LoopVariant; } bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) { @@ -6678,11 +6846,9 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { return ProperlyDominatesBlock; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return DoesNotDominateBlock; - default: break; + default: + llvm_unreachable("Unknown SCEV kind!"); } - llvm_unreachable("Unknown SCEV kind!"); - return DoesNotDominateBlock; } bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) { @@ -6728,11 +6894,9 @@ bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { return false; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return false; - default: break; + default: + llvm_unreachable("Unknown SCEV kind!"); } - llvm_unreachable("Unknown SCEV kind!"); - return false; } void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index 47f0f321161b..69507beeaae9 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -19,6 +19,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; @@ -30,6 +31,19 @@ using namespace llvm; Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, Instruction::CastOps Op, BasicBlock::iterator IP) { + // This function must be called with the builder having a valid insertion + // point. It doesn't need to be the actual IP where the uses of the returned + // cast will be added, but it must dominate such IP. + // We use this precondition to produce a cast that will dominate all its + // uses. In particular, this is crucial for the case where the builder's + // insertion point *is* the point where we were asked to put the cast. + // Since we don't know the the builder's insertion point is actually + // where the uses will be added (only that it dominates it), we are + // not allowed to move it. + BasicBlock::iterator BIP = Builder.GetInsertPoint(); + + Instruction *Ret = NULL; + // Check to see if there is already a cast! for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) { @@ -37,27 +51,35 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, if (U->getType() == Ty) if (CastInst *CI = dyn_cast(U)) if (CI->getOpcode() == Op) { - // If the cast isn't where we want it, fix it. - if (BasicBlock::iterator(CI) != IP) { + // If the cast isn't where we want it, create a new cast at IP. + // Likewise, do not reuse a cast at BIP because it must dominate + // instructions that might be inserted before BIP. + if (BasicBlock::iterator(CI) != IP || BIP == IP) { // Create a new cast, and leave the old cast in place in case // it is being used as an insert point. Clear its operand // so that it doesn't hold anything live. - Instruction *NewCI = CastInst::Create(Op, V, Ty, "", IP); - NewCI->takeName(CI); - CI->replaceAllUsesWith(NewCI); + Ret = CastInst::Create(Op, V, Ty, "", IP); + Ret->takeName(CI); + CI->replaceAllUsesWith(Ret); CI->setOperand(0, UndefValue::get(V->getType())); - rememberInstruction(NewCI); - return NewCI; + break; } - rememberInstruction(CI); - return CI; + Ret = CI; + break; } } // Create a new cast. - Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), IP); - rememberInstruction(I); - return I; + if (!Ret) + Ret = CastInst::Create(Op, V, Ty, V->getName(), IP); + + // We assert at the end of the function since IP might point to an + // instruction with different dominance properties than a cast + // (an invoke for example) and not dominate BIP (but the cast does). + assert(SE.DT->dominates(Ret, BIP)); + + rememberInstruction(Ret); + return Ret; } /// InsertNoopCastOfTo - Insert a cast of V to the specified type, @@ -73,9 +95,14 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { "InsertNoopCastOfTo cannot change sizes!"); // Short-circuit unnecessary bitcasts. - if (Op == Instruction::BitCast && V->getType() == Ty) - return V; - + if (Op == Instruction::BitCast) { + if (V->getType() == Ty) + return V; + if (CastInst *CI = dyn_cast(V)) { + if (CI->getOperand(0)->getType() == Ty) + return CI->getOperand(0); + } + } // Short-circuit unnecessary inttoptr<->ptrtoint casts. if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) && SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) { @@ -115,8 +142,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { BasicBlock::iterator IP = I; ++IP; if (InvokeInst *II = dyn_cast(I)) IP = II->getNormalDest()->begin(); - while (isa(IP) || isa(IP) || - isa(IP)) + while (isa(IP) || isa(IP)) ++IP; return ReuseOrCreateCast(I, Ty, Op, IP); } @@ -492,6 +518,9 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, V = InsertNoopCastOfTo(V, Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); + assert(!isa(V) || + SE.DT->dominates(cast(V), Builder.GetInsertPoint())); + // Expand the operands for a plain byte offset. Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); @@ -588,20 +617,6 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, return expand(SE.getAddExpr(Ops)); } -/// isNonConstantNegative - Return true if the specified scev is negated, but -/// not a constant. -static bool isNonConstantNegative(const SCEV *F) { - const SCEVMulExpr *Mul = dyn_cast(F); - if (!Mul) return false; - - // If there is a constant factor, it will be first. - const SCEVConstant *SC = dyn_cast(Mul->getOperand(0)); - if (!SC) return false; - - // Return true if the value is negative, this matches things like (-42 * V). - return SC->getValue()->getValue().isNegative(); -} - /// PickMostRelevantLoop - Given two loops pick the one that's most relevant for /// SCEV expansion. If they are nested, this is the most nested. If they are /// neighboring, pick the later. @@ -655,7 +670,6 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { return RelevantLoops[D] = Result; } llvm_unreachable("Unexpected SCEV type!"); - return 0; } namespace { @@ -680,10 +694,10 @@ class LoopCompare { // If one operand is a non-constant negative and the other is not, // put the non-constant negative on the right so that a sub can // be used instead of a negate and add. - if (isNonConstantNegative(LHS.second)) { - if (!isNonConstantNegative(RHS.second)) + if (LHS.second->isNonConstantNegative()) { + if (!RHS.second->isNonConstantNegative()) return false; - } else if (isNonConstantNegative(RHS.second)) + } else if (RHS.second->isNonConstantNegative()) return true; // Otherwise they are equivalent according to this comparison. @@ -744,7 +758,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { for (++I; I != E && I->first == CurLoop; ++I) NewOps.push_back(I->second); Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op)); - } else if (isNonConstantNegative(Op)) { + } else if (Op->isNonConstantNegative()) { // Instead of doing a negate and add, just do a subtract. Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty); Sum = InsertNoopCastOfTo(Sum, Ty); @@ -875,6 +889,94 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, return isNormalAddRecExprPHI(PN, IncV, L); } +/// getIVIncOperand returns an induction variable increment's induction +/// variable operand. +/// +/// If allowScale is set, any type of GEP is allowed as long as the nonIV +/// operands dominate InsertPos. +/// +/// If allowScale is not set, ensure that a GEP increment conforms to one of the +/// simple patterns generated by getAddRecExprPHILiterally and +/// expandAddtoGEP. If the pattern isn't recognized, return NULL. +Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, + Instruction *InsertPos, + bool allowScale) { + if (IncV == InsertPos) + return NULL; + + switch (IncV->getOpcode()) { + default: + return NULL; + // Check for a simple Add/Sub or GEP of a loop invariant step. + case Instruction::Add: + case Instruction::Sub: { + Instruction *OInst = dyn_cast(IncV->getOperand(1)); + if (!OInst || SE.DT->dominates(OInst, InsertPos)) + return dyn_cast(IncV->getOperand(0)); + return NULL; + } + case Instruction::BitCast: + return dyn_cast(IncV->getOperand(0)); + case Instruction::GetElementPtr: + for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end(); + I != E; ++I) { + if (isa(*I)) + continue; + if (Instruction *OInst = dyn_cast(*I)) { + if (!SE.DT->dominates(OInst, InsertPos)) + return NULL; + } + if (allowScale) { + // allow any kind of GEP as long as it can be hoisted. + continue; + } + // This must be a pointer addition of constants (pretty), which is already + // handled, or some number of address-size elements (ugly). Ugly geps + // have 2 operands. i1* is used by the expander to represent an + // address-size element. + if (IncV->getNumOperands() != 2) + return NULL; + unsigned AS = cast(IncV->getType())->getAddressSpace(); + if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) + && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) + return NULL; + break; + } + return dyn_cast(IncV->getOperand(0)); + } +} + +/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make +/// it available to other uses in this loop. Recursively hoist any operands, +/// until we reach a value that dominates InsertPos. +bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { + if (SE.DT->dominates(IncV, InsertPos)) + return true; + + // InsertPos must itself dominate IncV so that IncV's new position satisfies + // its existing users. + if (!SE.DT->dominates(InsertPos->getParent(), IncV->getParent())) + return false; + + // Check that the chain of IV operands leading back to Phi can be hoisted. + SmallVector IVIncs; + for(;;) { + Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true); + if (!Oper) + return false; + // IncV is safe to hoist. + IVIncs.push_back(IncV); + IncV = Oper; + if (SE.DT->dominates(IncV, InsertPos)) + break; + } + for (SmallVectorImpl::reverse_iterator I = IVIncs.rbegin(), + E = IVIncs.rend(); I != E; ++I) { + (*I)->moveBefore(InsertPos); + } + return true; +} + /// Determine if this cyclic phi is in a form that would have been generated by /// LSR. We don't care if the phi was actually expanded in this pass, as long /// as it is in a low-cost form, for example, no implied multiplication. This @@ -882,51 +984,43 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, /// expandAddtoGEP. bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L) { - switch (IncV->getOpcode()) { - // Check for a simple Add/Sub or GEP of a loop invariant step. - case Instruction::Add: - case Instruction::Sub: - return IncV->getOperand(0) == PN - && L->isLoopInvariant(IncV->getOperand(1)); - case Instruction::BitCast: - IncV = dyn_cast(IncV->getOperand(0)); - if (!IncV) - return false; - // fall-thru to GEP handling - case Instruction::GetElementPtr: { - // This must be a pointer addition of constants (pretty) or some number of - // address-size elements (ugly). - for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end(); - I != E; ++I) { - if (isa(*I)) - continue; - // ugly geps have 2 operands. - // i1* is used by the expander to represent an address-size element. - if (IncV->getNumOperands() != 2) - return false; - unsigned AS = cast(IncV->getType())->getAddressSpace(); - if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) - && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) - return false; - // Ensure the operands dominate the insertion point. I don't know of a - // case when this would not be true, so this is somewhat untested. - if (L == IVIncInsertLoop) { - for (User::op_iterator OI = IncV->op_begin()+1, - OE = IncV->op_end(); OI != OE; ++OI) - if (Instruction *OInst = dyn_cast(OI)) - if (!SE.DT->dominates(OInst, IVIncInsertPos)) - return false; - } - break; + for(Instruction *IVOper = IncV; + (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(), + /*allowScale=*/false));) { + if (IVOper == PN) + return true; + } + return false; +} + +/// expandIVInc - Expand an IV increment at Builder's current InsertPos. +/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may +/// need to materialize IV increments elsewhere to handle difficult situations. +Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, + Type *ExpandTy, Type *IntTy, + bool useSubtract) { + Value *IncV; + // If the PHI is a pointer, use a GEP, otherwise use an add or sub. + if (ExpandTy->isPointerTy()) { + PointerType *GEPPtrTy = cast(ExpandTy); + // If the step isn't constant, don't use an implicitly scaled GEP, because + // that would require a multiply inside the loop. + if (!isa(StepV)) + GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), + GEPPtrTy->getAddressSpace()); + const SCEV *const StepArray[1] = { SE.getSCEV(StepV) }; + IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN); + if (IncV->getType() != PN->getType()) { + IncV = Builder.CreateBitCast(IncV, PN->getType()); + rememberInstruction(IncV); } - IncV = dyn_cast(IncV->getOperand(0)); - if (IncV && IncV->getOpcode() == Instruction::BitCast) - IncV = dyn_cast(IncV->getOperand(0)); - return IncV == PN; - } - default: - return false; + } else { + IncV = useSubtract ? + Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : + Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); + rememberInstruction(IncV); } + return IncV; } /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand @@ -956,26 +1050,28 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, if (LSRMode) { if (!isExpandedAddRecExprPHI(PN, IncV, L)) continue; + if (L == IVIncInsertLoop && !hoistIVInc(IncV, IVIncInsertPos)) + continue; } else { if (!isNormalAddRecExprPHI(PN, IncV, L)) continue; + if (L == IVIncInsertLoop) + do { + if (SE.DT->dominates(IncV, IVIncInsertPos)) + break; + // Make sure the increment is where we want it. But don't move it + // down past a potential existing post-inc user. + IncV->moveBefore(IVIncInsertPos); + IVIncInsertPos = IncV; + IncV = cast(IncV->getOperand(0)); + } while (IncV != PN); } // Ok, the add recurrence looks usable. // Remember this PHI, even in post-inc mode. InsertedValues.insert(PN); // Remember the increment. rememberInstruction(IncV); - if (L == IVIncInsertLoop) - do { - if (SE.DT->dominates(IncV, IVIncInsertPos)) - break; - // Make sure the increment is where we want it. But don't move it - // down past a potential existing post-inc user. - IncV->moveBefore(IVIncInsertPos); - IVIncInsertPos = IncV; - IncV = cast(IncV->getOperand(0)); - } while (IncV != PN); return PN; } } @@ -984,6 +1080,16 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + // Another AddRec may need to be recursively expanded below. For example, if + // this AddRec is quadratic, the StepV may itself be an AddRec in this + // loop. Remove this loop from the PostIncLoops set before expanding such + // AddRecs. Otherwise, we cannot find a valid position for the step + // (i.e. StepV can never dominate its loop header). Ideally, we could do + // SavedIncLoops.swap(PostIncLoops), but we generally have a single element, + // so it's not worth implementing SmallPtrSet::swap. + PostIncLoopSet SavedPostIncLoops = PostIncLoops; + PostIncLoops.clear(); + // Expand code for the start value. Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, L->getHeader()->begin()); @@ -993,16 +1099,16 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, SE.DT->properlyDominates(cast(StartV)->getParent(), L->getHeader())); - // Expand code for the step value. Insert instructions right before the - // terminator corresponding to the back-edge. Do this before creating the PHI - // so that PHI reuse code doesn't see an incomplete PHI. If the stride is - // negative, insert a sub instead of an add for the increment (unless it's a - // constant, because subtracts of constants are canonicalized to adds). + // Expand code for the step value. Do this before creating the PHI so that PHI + // reuse code doesn't see an incomplete PHI. const SCEV *Step = Normalized->getStepRecurrence(SE); - bool isPointer = ExpandTy->isPointerTy(); - bool isNegative = !isPointer && isNonConstantNegative(Step); - if (isNegative) + // If the stride is negative, insert a sub instead of an add for the increment + // (unless it's a constant, because subtracts of constants are canonicalized + // to adds). + bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); + if (useSubtract) Step = SE.getNegativeSCEV(Step); + // Expand the step somewhere that dominates the loop header. Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); // Create the PHI. @@ -1023,33 +1129,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, continue; } - // Create a step value and add it to the PHI. If IVIncInsertLoop is - // non-null and equal to the addrec's loop, insert the instructions - // at IVIncInsertPos. + // Create a step value and add it to the PHI. + // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the + // instructions at IVIncInsertPos. Instruction *InsertPos = L == IVIncInsertLoop ? IVIncInsertPos : Pred->getTerminator(); Builder.SetInsertPoint(InsertPos); - Value *IncV; - // If the PHI is a pointer, use a GEP, otherwise use an add or sub. - if (isPointer) { - PointerType *GEPPtrTy = cast(ExpandTy); - // If the step isn't constant, don't use an implicitly scaled GEP, because - // that would require a multiply inside the loop. - if (!isa(StepV)) - GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), - GEPPtrTy->getAddressSpace()); - const SCEV *const StepArray[1] = { SE.getSCEV(StepV) }; - IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN); - if (IncV->getType() != PN->getType()) { - IncV = Builder.CreateBitCast(IncV, PN->getType()); - rememberInstruction(IncV); - } - } else { - IncV = isNegative ? - Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : - Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); - rememberInstruction(IncV); - } + Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + PN->addIncoming(IncV, Pred); } @@ -1057,6 +1144,10 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, if (SaveInsertBB) restoreInsertPoint(SaveInsertBB, SaveInsertPt); + // After expanding subexpressions, restore the PostIncLoops set so the caller + // can ensure that IVIncrement dominates the current uses. + PostIncLoops = SavedPostIncLoops; + // Remember this PHI, even in post-inc mode. InsertedValues.insert(PN); @@ -1124,10 +1215,31 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // For an expansion to use the postinc form, the client must call // expandCodeFor with an InsertPoint that is either outside the PostIncLoop // or dominated by IVIncInsertPos. - assert((!isa(Result) || - SE.DT->dominates(cast(Result), - Builder.GetInsertPoint())) && - "postinc expansion does not dominate use"); + if (isa(Result) + && !SE.DT->dominates(cast(Result), + Builder.GetInsertPoint())) { + // The induction variable's postinc expansion does not dominate this use. + // IVUsers tries to prevent this case, so it is rare. However, it can + // happen when an IVUser outside the loop is not dominated by the latch + // block. Adjusting IVIncInsertPos before expansion begins cannot handle + // all cases. Consider a phi outide whose operand is replaced during + // expansion with the value of the postinc user. Without fundamentally + // changing the way postinc users are tracked, the only remedy is + // inserting an extra IV increment. StepV might fold into PostLoopOffset, + // but hopefully expandCodeFor handles that. + bool useSubtract = + !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); + if (useSubtract) + Step = SE.getNegativeSCEV(Step); + // Expand the step somewhere that dominates the loop header. + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + // Restore the insertion point to the place where the caller has + // determined dominates all uses. + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + } } // Re-apply any non-loop-dominating scale. @@ -1363,10 +1475,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { } Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, - Instruction *I) { - BasicBlock::iterator IP = I; - while (isInsertedInstruction(IP) || isa(IP)) - ++IP; + Instruction *IP) { Builder.SetInsertPoint(IP->getParent(), IP); return expandCodeFor(SH, Ty); } @@ -1392,14 +1501,23 @@ Value *SCEVExpander::expand(const SCEV *S) { if (!L) break; if (BasicBlock *Preheader = L->getLoopPreheader()) InsertPt = Preheader->getTerminator(); + else { + // LSR sets the insertion point for AddRec start/step values to the + // block start to simplify value reuse, even though it's an invalid + // position. SCEVExpander must correct for this in all cases. + InsertPt = L->getHeader()->getFirstInsertionPt(); + } } else { // If the SCEV is computable at this level, insert it into the header // after the PHIs (and after any other instructions that we've inserted // there) so that it is guaranteed to dominate any user inside the loop. if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) InsertPt = L->getHeader()->getFirstInsertionPt(); - while (isInsertedInstruction(InsertPt) || isa(InsertPt)) + while (InsertPt != Builder.GetInsertPoint() + && (isInsertedInstruction(InsertPt) + || isa(InsertPt))) { InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); + } break; } @@ -1434,23 +1552,9 @@ void SCEVExpander::rememberInstruction(Value *I) { InsertedPostIncValues.insert(I); else InsertedValues.insert(I); - - // If we just claimed an existing instruction and that instruction had - // been the insert point, adjust the insert point forward so that - // subsequently inserted code will be dominated. - if (Builder.GetInsertPoint() == I) { - BasicBlock::iterator It = cast(I); - do { ++It; } while (isInsertedInstruction(It) || - isa(It)); - Builder.SetInsertPoint(Builder.GetInsertBlock(), It); - } } void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) { - // If we acquired more instructions since the old insert point was saved, - // advance past them. - while (isInsertedInstruction(I) || isa(I)) ++I; - Builder.SetInsertPoint(BB, I); } @@ -1478,40 +1582,13 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, return V; } -/// hoistStep - Attempt to hoist an IV increment above a potential use. -/// -/// To successfully hoist, two criteria must be met: -/// - IncV operands dominate InsertPos and -/// - InsertPos dominates IncV -/// -/// Meeting the second condition means that we don't need to check all of IncV's -/// existing uses (it's moving up in the domtree). -/// -/// This does not yet recursively hoist the operands, although that would -/// not be difficult. -/// -/// This does not require a SCEVExpander instance and could be replaced by a -/// general code-insertion helper. -bool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos, - const DominatorTree *DT) { - if (DT->dominates(IncV, InsertPos)) - return true; - - if (!DT->dominates(InsertPos->getParent(), IncV->getParent())) - return false; - - if (IncV->mayHaveSideEffects()) - return false; - - // Attempt to hoist IncV - for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end(); - OI != OE; ++OI) { - Instruction *OInst = dyn_cast(OI); - if (OInst && !DT->dominates(OInst, InsertPos)) - return false; - } - IncV->moveBefore(InsertPos); - return true; +/// Sort values by integer width for replaceCongruentIVs. +static bool width_descending(Value *lhs, Value *rhs) { + // Put pointers at the back and make sure pointer < pointer = false. + if (!lhs->getType()->isIntegerTy() || !rhs->getType()->isIntegerTy()) + return rhs->getType()->isIntegerTy() && !lhs->getType()->isIntegerTy(); + return rhs->getType()->getPrimitiveSizeInBits() + < lhs->getType()->getPrimitiveSizeInBits(); } /// replaceCongruentIVs - Check for congruent phis in this loop header and @@ -1521,23 +1598,45 @@ bool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos, /// This does not depend on any SCEVExpander state but should be used in /// the same context that SCEVExpander is used. unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, - SmallVectorImpl &DeadInsts) { + SmallVectorImpl &DeadInsts, + const TargetLowering *TLI) { + // Find integer phis in order of increasing width. + SmallVector Phis; + for (BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *Phi = dyn_cast(I); ++I) { + Phis.push_back(Phi); + } + if (TLI) + std::sort(Phis.begin(), Phis.end(), width_descending); + unsigned NumElim = 0; DenseMap ExprToIVMap; - for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) { - PHINode *Phi = cast(I); + // Process phis from wide to narrow. Mapping wide phis to the their truncation + // so narrow phis can reuse them. + for (SmallVectorImpl::const_iterator PIter = Phis.begin(), + PEnd = Phis.end(); PIter != PEnd; ++PIter) { + PHINode *Phi = *PIter; + if (!SE.isSCEVable(Phi->getType())) continue; PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; if (!OrigPhiRef) { OrigPhiRef = Phi; + if (Phi->getType()->isIntegerTy() && TLI + && TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { + // This phi can be freely truncated to the narrowest phi type. Map the + // truncated expression to it so it will be reused for narrow types. + const SCEV *TruncExpr = + SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType()); + ExprToIVMap[TruncExpr] = Phi; + } continue; } - // If one phi derives from the other via GEPs, types may differ. - // We could consider adding a bitcast here to handle it. - if (OrigPhiRef->getType() != Phi->getType()) + // Replacing a pointer phi with an integer phi or vice-versa doesn't make + // sense. + if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy()) continue; if (BasicBlock *LatchBlock = L->getLoopLatch()) { @@ -1546,32 +1645,56 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, Instruction *IsomorphicInc = cast(Phi->getIncomingValueForBlock(LatchBlock)); - // If this phi is more canonical, swap it with the original. - if (!isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L) - && isExpandedAddRecExprPHI(Phi, IsomorphicInc, L)) { + // If this phi has the same width but is more canonical, replace the + // original with it. As part of the "more canonical" determination, + // respect a prior decision to use an IV chain. + if (OrigPhiRef->getType() == Phi->getType() + && !(ChainedPhis.count(Phi) + || isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) + && (ChainedPhis.count(Phi) + || isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) { std::swap(OrigPhiRef, Phi); std::swap(OrigInc, IsomorphicInc); } // Replacing the congruent phi is sufficient because acyclic redundancy // elimination, CSE/GVN, should handle the rest. However, once SCEV proves // that a phi is congruent, it's often the head of an IV user cycle that - // is isomorphic with the original phi. So it's worth eagerly cleaning up - // the common case of a single IV increment. - if (OrigInc != IsomorphicInc && - OrigInc->getType() == IsomorphicInc->getType() && - SE.getSCEV(OrigInc) == SE.getSCEV(IsomorphicInc) && - hoistStep(OrigInc, IsomorphicInc, DT)) { + // is isomorphic with the original phi. It's worth eagerly cleaning up the + // common case of a single IV increment so that DeleteDeadPHIs can remove + // cycles that had postinc uses. + const SCEV *TruncExpr = SE.getTruncateOrNoop(SE.getSCEV(OrigInc), + IsomorphicInc->getType()); + if (OrigInc != IsomorphicInc + && TruncExpr == SE.getSCEV(IsomorphicInc) + && ((isa(OrigInc) && isa(IsomorphicInc)) + || hoistIVInc(OrigInc, IsomorphicInc))) { DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv.inc: " << *IsomorphicInc << '\n'); - IsomorphicInc->replaceAllUsesWith(OrigInc); + Value *NewInc = OrigInc; + if (OrigInc->getType() != IsomorphicInc->getType()) { + Instruction *IP = isa(OrigInc) + ? (Instruction*)L->getHeader()->getFirstInsertionPt() + : OrigInc->getNextNode(); + IRBuilder<> Builder(IP); + Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc()); + NewInc = Builder. + CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName); + } + IsomorphicInc->replaceAllUsesWith(NewInc); DeadInsts.push_back(IsomorphicInc); } } DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); ++NumElim; - Phi->replaceAllUsesWith(OrigPhiRef); + Value *NewIV = OrigPhiRef; + if (OrigPhiRef->getType() != Phi->getType()) { + IRBuilder<> Builder(L->getHeader()->getFirstInsertionPt()); + Builder.SetCurrentDebugLocation(Phi->getDebugLoc()); + NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName); + } + Phi->replaceAllUsesWith(NewIV); DeadInsts.push_back(Phi); } return NumElim; diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp index c66ecd6e8727..dd2ed4ff831c 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -118,7 +118,6 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { // Conservatively use AnyWrap until/unless we need FlagNW. const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); switch (Kind) { - default: llvm_unreachable("Unexpected transform name!"); case NormalizeAutodetect: if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) { const SCEV *TransformedStep = @@ -191,7 +190,6 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { } llvm_unreachable("Unexpected SCEV kind!"); - return 0; } /// Manage recursive transformation across an expression DAG. Revisiting diff --git a/contrib/llvm/lib/Analysis/SparsePropagation.cpp b/contrib/llvm/lib/Analysis/SparsePropagation.cpp index d8c207b4bd49..c819666ee444 100644 --- a/contrib/llvm/lib/Analysis/SparsePropagation.cpp +++ b/contrib/llvm/lib/Analysis/SparsePropagation.cpp @@ -194,8 +194,8 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI, Succs.assign(TI.getNumSuccessors(), true); return; } - - Succs[SI.findCaseValue(cast(C))] = true; + SwitchInst::CaseIt Case = SI.findCaseValue(cast(C)); + Succs[Case.getSuccessorIndex()] = true; } @@ -327,13 +327,13 @@ void SparseSolver::Solve(Function &F) { } void SparseSolver::Print(Function &F, raw_ostream &OS) const { - OS << "\nFUNCTION: " << F.getNameStr() << "\n"; + OS << "\nFUNCTION: " << F.getName() << "\n"; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (!BBExecutable.count(BB)) OS << "INFEASIBLE: "; OS << "\t"; if (BB->hasName()) - OS << BB->getNameStr() << ":\n"; + OS << BB->getName() << ":\n"; else OS << "; anon bb\n"; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { diff --git a/contrib/llvm/lib/Analysis/Trace.cpp b/contrib/llvm/lib/Analysis/Trace.cpp index 68a39cd581f4..ff5010bad7bb 100644 --- a/contrib/llvm/lib/Analysis/Trace.cpp +++ b/contrib/llvm/lib/Analysis/Trace.cpp @@ -34,7 +34,7 @@ Module *Trace::getModule() const { /// void Trace::print(raw_ostream &O) const { Function *F = getFunction(); - O << "; Trace from function " << F->getNameStr() << ", blocks:\n"; + O << "; Trace from function " << F->getName() << ", blocks:\n"; for (const_iterator i = begin(), e = end(); i != e; ++i) { O << "; "; WriteAsOperand(O, *i, true, getModule()); diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index 4d94f619fda1..a430f6281ef0 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -20,8 +20,10 @@ #include "llvm/GlobalAlias.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" #include "llvm/Operator.h" #include "llvm/Target/TargetData.h" +#include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/PatternMatch.h" @@ -41,10 +43,176 @@ static unsigned getBitWidth(Type *Ty, const TargetData *TD) { return TD ? TD->getPointerSizeInBits() : 0; } -/// ComputeMaskedBits - Determine which of the bits specified in Mask are -/// known to be either zero or one and return them in the KnownZero/KnownOne -/// bit sets. This code only analyzes bits in Mask, in order to short-circuit -/// processing. +static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const TargetData *TD, unsigned Depth) { + if (!Add) { + if (ConstantInt *CLHS = dyn_cast(Op0)) { + // We know that the top bits of C-X are clear if X contains less bits + // than C (i.e. no wrap-around can happen). For example, 20-X is + // positive if we can prove that X is >= 0 and < 16. + if (!CLHS->getValue().isNegative()) { + unsigned BitWidth = KnownZero.getBitWidth(); + unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); + // NLZ can't be BitWidth with no sign bit + APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); + llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); + + // If all of the MaskV bits are known to be zero, then we know the + // output top bits are zero, because we now know that the output is + // from [0-C]. + if ((KnownZero2 & MaskV) == MaskV) { + unsigned NLZ2 = CLHS->getValue().countLeadingZeros(); + // Top bits known zero. + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2); + } + } + } + } + + unsigned BitWidth = KnownZero.getBitWidth(); + + // If one of the operands has trailing zeros, then the bits that the + // other operand has in those bit positions will be preserved in the + // result. For an add, this works with either operand. For a subtract, + // this only works if the known zeros are in the right operand. + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + llvm::ComputeMaskedBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1); + assert((LHSKnownZero & LHSKnownOne) == 0 && + "Bits known to be one AND zero?"); + unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); + + llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); + + // Determine which operand has more trailing zeros, and use that + // many bits from the other operand. + if (LHSKnownZeroOut > RHSKnownZeroOut) { + if (Add) { + APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut); + KnownZero |= KnownZero2 & Mask; + KnownOne |= KnownOne2 & Mask; + } else { + // If the known zeros are in the left operand for a subtract, + // fall back to the minimum known zeros in both operands. + KnownZero |= APInt::getLowBitsSet(BitWidth, + std::min(LHSKnownZeroOut, + RHSKnownZeroOut)); + } + } else if (RHSKnownZeroOut >= LHSKnownZeroOut) { + APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut); + KnownZero |= LHSKnownZero & Mask; + KnownOne |= LHSKnownOne & Mask; + } + + // Are we still trying to solve for the sign bit? + if (!KnownZero.isNegative() && !KnownOne.isNegative()) { + if (NSW) { + if (Add) { + // Adding two positive numbers can't wrap into negative + if (LHSKnownZero.isNegative() && KnownZero2.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + // and adding two negative numbers can't wrap into positive. + else if (LHSKnownOne.isNegative() && KnownOne2.isNegative()) + KnownOne |= APInt::getSignBit(BitWidth); + } else { + // Subtracting a negative number from a positive one can't wrap + if (LHSKnownZero.isNegative() && KnownOne2.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + // neither can subtracting a positive number from a negative one. + else if (LHSKnownOne.isNegative() && KnownZero2.isNegative()) + KnownOne |= APInt::getSignBit(BitWidth); + } + } + } +} + +static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const TargetData *TD, unsigned Depth) { + unsigned BitWidth = KnownZero.getBitWidth(); + ComputeMaskedBits(Op1, KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(Op0, KnownZero2, KnownOne2, TD, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + bool isKnownNegative = false; + bool isKnownNonNegative = false; + // If the multiplication is known not to overflow, compute the sign bit. + if (NSW) { + if (Op0 == Op1) { + // The product of a number with itself is non-negative. + isKnownNonNegative = true; + } else { + bool isKnownNonNegativeOp1 = KnownZero.isNegative(); + bool isKnownNonNegativeOp0 = KnownZero2.isNegative(); + bool isKnownNegativeOp1 = KnownOne.isNegative(); + bool isKnownNegativeOp0 = KnownOne2.isNegative(); + // The product of two numbers with the same sign is non-negative. + isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || + (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); + // The product of a negative number and a non-negative number is either + // negative or zero. + if (!isKnownNonNegative) + isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 && + isKnownNonZero(Op0, TD, Depth)) || + (isKnownNegativeOp0 && isKnownNonNegativeOp1 && + isKnownNonZero(Op1, TD, Depth)); + } + } + + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conserative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + KnownOne.clearAllBits(); + unsigned TrailZ = KnownZero.countTrailingOnes() + + KnownZero2.countTrailingOnes(); + unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + + KnownZero2.countLeadingOnes(), + BitWidth) - BitWidth; + + TrailZ = std::min(TrailZ, BitWidth); + LeadZ = std::min(LeadZ, BitWidth); + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | + APInt::getHighBitsSet(BitWidth, LeadZ); + + // Only make use of no-wrap flags if we failed to compute the sign bit + // directly. This matters if the multiplication always overflows, in + // which case we prefer to follow the result of the direct computation, + // though as the program is invoking undefined behaviour we can choose + // whatever we like here. + if (isKnownNonNegative && !KnownOne.isNegative()) + KnownZero.setBit(BitWidth - 1); + else if (isKnownNegative && !KnownZero.isNegative()) + KnownOne.setBit(BitWidth - 1); +} + +void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) { + unsigned BitWidth = KnownZero.getBitWidth(); + unsigned NumRanges = Ranges.getNumOperands() / 2; + assert(NumRanges >= 1); + + // Use the high end of the ranges to find leading zeros. + unsigned MinLeadingZeros = BitWidth; + for (unsigned i = 0; i < NumRanges; ++i) { + ConstantInt *Lower = cast(Ranges.getOperand(2*i + 0)); + ConstantInt *Upper = cast(Ranges.getOperand(2*i + 1)); + ConstantRange Range(Lower->getValue(), Upper->getValue()); + if (Range.isWrappedSet()) + MinLeadingZeros = 0; // -1 has no zeros + unsigned LeadingZeros = (Upper->getValue() - 1).countLeadingZeros(); + MinLeadingZeros = std::min(LeadingZeros, MinLeadingZeros); + } + + KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros); +} +/// ComputeMaskedBits - Determine which of the bits are known to be either zero +/// or one and return them in the KnownZero/KnownOne bit sets. +/// /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that /// we cannot optimize based on the assumption that it is zero without changing /// it to be an explicit zero. If we don't change it to zero, other code could @@ -54,67 +222,75 @@ static unsigned getBitWidth(Type *Ty, const TargetData *TD) { /// /// This function is defined on values with integer type, values with pointer /// type (but only if TD is non-null), and vectors of integers. In the case -/// where V is a vector, the mask, known zero, and known one values are the +/// where V is a vector, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, - APInt &KnownZero, APInt &KnownOne, +void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, const TargetData *TD, unsigned Depth) { assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); - unsigned BitWidth = Mask.getBitWidth(); - assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy()) - && "Not integer or pointer type!"); + unsigned BitWidth = KnownZero.getBitWidth(); + + assert((V->getType()->isIntOrIntVectorTy() || + V->getType()->getScalarType()->isPointerTy()) && + "Not integer or pointer type!"); assert((!TD || TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && (!V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarSizeInBits() == BitWidth) && - KnownZero.getBitWidth() == BitWidth && + KnownZero.getBitWidth() == BitWidth && KnownOne.getBitWidth() == BitWidth && "V, Mask, KnownOne and KnownZero should have same BitWidth"); if (ConstantInt *CI = dyn_cast(V)) { // We know all of the bits for a constant! - KnownOne = CI->getValue() & Mask; - KnownZero = ~KnownOne & Mask; + KnownOne = CI->getValue(); + KnownZero = ~KnownOne; return; } // Null and aggregate-zero are all-zeros. if (isa(V) || isa(V)) { KnownOne.clearAllBits(); - KnownZero = Mask; + KnownZero = APInt::getAllOnesValue(BitWidth); return; } // Handle a constant vector by taking the intersection of the known bits of - // each element. - if (ConstantVector *CV = dyn_cast(V)) { + // each element. There is no real need to handle ConstantVector here, because + // we don't handle undef in any particularly useful way. + if (ConstantDataSequential *CDS = dyn_cast(V)) { + // We know that CDS must be a vector of integers. Take the intersection of + // each element. KnownZero.setAllBits(); KnownOne.setAllBits(); - for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { - APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); - ComputeMaskedBits(CV->getOperand(i), Mask, KnownZero2, KnownOne2, - TD, Depth); - KnownZero &= KnownZero2; - KnownOne &= KnownOne2; + APInt Elt(KnownZero.getBitWidth(), 0); + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + Elt = CDS->getElementAsInteger(i); + KnownZero &= ~Elt; + KnownOne &= Elt; } return; } + // The address of an aligned GlobalValue has trailing zeros. if (GlobalValue *GV = dyn_cast(V)) { unsigned Align = GV->getAlignment(); - if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) { - Type *ObjectType = GV->getType()->getElementType(); - // If the object is defined in the current Module, we'll be giving - // it the preferred alignment. Otherwise, we have to assume that it - // may only have the minimum ABI alignment. - if (!GV->isDeclaration() && !GV->mayBeOverridden()) - Align = TD->getPrefTypeAlignment(ObjectType); - else - Align = TD->getABITypeAlignment(ObjectType); + if (Align == 0 && TD) { + if (GlobalVariable *GVar = dyn_cast(GV)) { + Type *ObjectType = GVar->getType()->getElementType(); + if (ObjectType->isSized()) { + // If the object is defined in the current Module, we'll be giving + // it the preferred alignment. Otherwise, we have to assume that it + // may only have the minimum ABI alignment. + if (!GVar->isDeclaration() && !GVar->isWeakForLinker()) + Align = TD->getPreferredAlignment(GVar); + else + Align = TD->getABITypeAlignment(ObjectType); + } + } } if (Align > 0) - KnownZero = Mask & APInt::getLowBitsSet(BitWidth, - CountTrailingZeros_32(Align)); + KnownZero = APInt::getLowBitsSet(BitWidth, + CountTrailingZeros_32(Align)); else KnownZero.clearAllBits(); KnownOne.clearAllBits(); @@ -126,8 +302,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, if (GA->mayBeOverridden()) { KnownZero.clearAllBits(); KnownOne.clearAllBits(); } else { - ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne, - TD, Depth+1); + ComputeMaskedBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1); } return; } @@ -136,15 +311,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // Get alignment information off byval arguments if specified in the IR. if (A->hasByValAttr()) if (unsigned Align = A->getParamAlignment()) - KnownZero = Mask & APInt::getLowBitsSet(BitWidth, - CountTrailingZeros_32(Align)); + KnownZero = APInt::getLowBitsSet(BitWidth, + CountTrailingZeros_32(Align)); return; } // Start out not knowing anything. KnownZero.clearAllBits(); KnownOne.clearAllBits(); - if (Depth == MaxDepth || Mask == 0) + if (Depth == MaxDepth) return; // Limit search depth. Operator *I = dyn_cast(V); @@ -153,12 +328,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, APInt KnownZero2(KnownZero), KnownOne2(KnownOne); switch (I->getOpcode()) { default: break; + case Instruction::Load: + if (MDNode *MD = cast(I)->getMetadata(LLVMContext::MD_range)) + computeMaskedBitsLoad(*MD, KnownZero); + return; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1); - APInt Mask2(Mask & ~KnownZero); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -169,10 +346,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, return; } case Instruction::Or: { - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1); - APInt Mask2(Mask & ~KnownOne); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -183,9 +358,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, return; } case Instruction::Xor: { - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -197,55 +371,32 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, return; } case Instruction::Mul: { - APInt Mask2 = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, - Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - - // If low bits are zero in either operand, output low known-0 bits. - // Also compute a conserative estimate for high known-0 bits. - // More trickiness is possible, but this is sufficient for the - // interesting case of alignment computation. - KnownOne.clearAllBits(); - unsigned TrailZ = KnownZero.countTrailingOnes() + - KnownZero2.countTrailingOnes(); - unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + - KnownZero2.countLeadingOnes(), - BitWidth) - BitWidth; - - TrailZ = std::min(TrailZ, BitWidth); - LeadZ = std::min(LeadZ, BitWidth); - KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | - APInt::getHighBitsSet(BitWidth, LeadZ); - KnownZero &= Mask; - return; + bool NSW = cast(I)->hasNoSignedWrap(); + ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW, + KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth); + break; } case Instruction::UDiv: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(I->getOperand(0), - AllOnes, KnownZero2, KnownOne2, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - ComputeMaskedBits(I->getOperand(1), - AllOnes, KnownZero2, KnownOne2, TD, Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); - KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); return; } case Instruction::Select: - ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, TD, + ComputeMaskedBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -278,11 +429,9 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, else SrcBitWidth = SrcTy->getScalarSizeInBits(); - APInt MaskIn = Mask.zextOrTrunc(SrcBitWidth); KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); - ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); KnownZero = KnownZero.zextOrTrunc(BitWidth); KnownOne = KnownOne.zextOrTrunc(BitWidth); // Any top bits are known to be zero. @@ -296,8 +445,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); return; } break; @@ -306,11 +454,9 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); - APInt MaskIn = Mask.trunc(SrcBitWidth); KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -327,9 +473,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - APInt Mask2(Mask.lshr(ShiftAmt)); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero <<= ShiftAmt; KnownOne <<= ShiftAmt; @@ -344,9 +488,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); // Unsigned shift right. - APInt Mask2(Mask.shl(ShiftAmt)); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); @@ -362,9 +504,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Signed shift right. - APInt Mask2(Mask.shl(ShiftAmt)); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); @@ -378,100 +518,25 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, } break; case Instruction::Sub: { - if (ConstantInt *CLHS = dyn_cast(I->getOperand(0))) { - // We know that the top bits of C-X are clear if X contains less bits - // than C (i.e. no wrap-around can happen). For example, 20-X is - // positive if we can prove that X is >= 0 and < 16. - if (!CLHS->getValue().isNegative()) { - unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); - // NLZ can't be BitWidth with no sign bit - APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero2, KnownOne2, - TD, Depth+1); - - // If all of the MaskV bits are known to be zero, then we know the - // output top bits are zero, because we now know that the output is - // from [0-C]. - if ((KnownZero2 & MaskV) == MaskV) { - unsigned NLZ2 = CLHS->getValue().countLeadingZeros(); - // Top bits known zero. - KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; - } - } - } + bool NSW = cast(I)->hasNoSignedWrap(); + ComputeMaskedBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, + KnownZero, KnownOne, KnownZero2, KnownOne2, TD, + Depth); + break; } - // fall through case Instruction::Add: { - // If one of the operands has trailing zeros, then the bits that the - // other operand has in those bit positions will be preserved in the - // result. For an add, this works with either operand. For a subtract, - // this only works if the known zeros are in the right operand. - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - APInt Mask2 = APInt::getLowBitsSet(BitWidth, - BitWidth - Mask.countLeadingZeros()); - ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD, - Depth+1); - assert((LHSKnownZero & LHSKnownOne) == 0 && - "Bits known to be one AND zero?"); - unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); - - ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero2, KnownOne2, TD, - Depth+1); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); - - // Determine which operand has more trailing zeros, and use that - // many bits from the other operand. - if (LHSKnownZeroOut > RHSKnownZeroOut) { - if (I->getOpcode() == Instruction::Add) { - APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut); - KnownZero |= KnownZero2 & Mask; - KnownOne |= KnownOne2 & Mask; - } else { - // If the known zeros are in the left operand for a subtract, - // fall back to the minimum known zeros in both operands. - KnownZero |= APInt::getLowBitsSet(BitWidth, - std::min(LHSKnownZeroOut, - RHSKnownZeroOut)); - } - } else if (RHSKnownZeroOut >= LHSKnownZeroOut) { - APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut); - KnownZero |= LHSKnownZero & Mask; - KnownOne |= LHSKnownOne & Mask; - } - - // Are we still trying to solve for the sign bit? - if (Mask.isNegative() && !KnownZero.isNegative() && !KnownOne.isNegative()){ - OverflowingBinaryOperator *OBO = cast(I); - if (OBO->hasNoSignedWrap()) { - if (I->getOpcode() == Instruction::Add) { - // Adding two positive numbers can't wrap into negative - if (LHSKnownZero.isNegative() && KnownZero2.isNegative()) - KnownZero |= APInt::getSignBit(BitWidth); - // and adding two negative numbers can't wrap into positive. - else if (LHSKnownOne.isNegative() && KnownOne2.isNegative()) - KnownOne |= APInt::getSignBit(BitWidth); - } else { - // Subtracting a negative number from a positive one can't wrap - if (LHSKnownZero.isNegative() && KnownOne2.isNegative()) - KnownZero |= APInt::getSignBit(BitWidth); - // neither can subtracting a positive number from a negative one. - else if (LHSKnownOne.isNegative() && KnownZero2.isNegative()) - KnownOne |= APInt::getSignBit(BitWidth); - } - } - } - - return; + bool NSW = cast(I)->hasNoSignedWrap(); + ComputeMaskedBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, + KnownZero, KnownOne, KnownZero2, KnownOne2, TD, + Depth); + break; } case Instruction::SRem: if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -487,19 +552,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) KnownOne |= ~LowBits; - KnownZero &= Mask; - KnownOne &= Mask; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } } // The sign bit is the LHS's sign bit, except when the result of the // remainder is zero. - if (Mask.isNegative() && KnownZero.isNonNegative()) { - APInt Mask2 = APInt::getSignBit(BitWidth); + if (KnownZero.isNonNegative()) { APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD, + ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD, Depth+1); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) @@ -512,27 +573,24 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, APInt RA = Rem->getValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - APInt Mask2 = LowBits & Mask; - KnownZero |= ~LowBits & Mask; - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero |= ~LowBits; + KnownOne &= LowBits; break; } } // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(I->getOperand(0), AllOnes, KnownZero, KnownOne, - TD, Depth+1); - ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2, - TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); unsigned Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); KnownOne.clearAllBits(); - KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); break; } @@ -543,17 +601,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, Align = TD->getABITypeAlignment(AI->getType()->getElementType()); if (Align > 0) - KnownZero = Mask & APInt::getLowBitsSet(BitWidth, - CountTrailingZeros_32(Align)); + KnownZero = APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align)); break; } case Instruction::GetElementPtr: { // Analyze all of the subscripts of this getelementptr instruction // to determine if we can prove known low zero bits. - APInt LocalMask = APInt::getAllOnesValue(BitWidth); APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0); - ComputeMaskedBits(I->getOperand(0), LocalMask, - LocalKnownZero, LocalKnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD, + Depth+1); unsigned TrailZ = LocalKnownZero.countTrailingOnes(); gep_type_iterator GTI = gep_type_begin(I); @@ -573,17 +629,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, if (!IndexedTy->isSized()) return; unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1; - LocalMask = APInt::getAllOnesValue(GEPOpiBits); LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); - ComputeMaskedBits(Index, LocalMask, - LocalKnownZero, LocalKnownOne, TD, Depth+1); + ComputeMaskedBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1); TrailZ = std::min(TrailZ, unsigned(CountTrailingZeros_64(TypeSize) + LocalKnownZero.countTrailingOnes())); } } - KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) & Mask; + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ); break; } case Instruction::PHI: { @@ -618,17 +672,13 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, break; // Ok, we have a PHI of the form L op= R. Check for low // zero bits. - APInt Mask2 = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(R, Mask2, KnownZero2, KnownOne2, TD, Depth+1); - Mask2 = APInt::getLowBitsSet(BitWidth, - KnownZero2.countTrailingOnes()); + ComputeMaskedBits(R, KnownZero2, KnownOne2, TD, Depth+1); // We need to take the minimum number of known bits APInt KnownZero3(KnownZero), KnownOne3(KnownOne); - ComputeMaskedBits(L, Mask2, KnownZero3, KnownOne3, TD, Depth+1); + ComputeMaskedBits(L, KnownZero3, KnownOne3, TD, Depth+1); - KnownZero = Mask & - APInt::getLowBitsSet(BitWidth, + KnownZero = APInt::getLowBitsSet(BitWidth, std::min(KnownZero2.countTrailingOnes(), KnownZero3.countTrailingOnes())); break; @@ -657,8 +707,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, KnownOne2 = APInt(BitWidth, 0); // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. - ComputeMaskedBits(P->getIncomingValue(i), KnownZero | KnownOne, - KnownZero2, KnownOne2, TD, MaxDepth-1); + ComputeMaskedBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD, + MaxDepth-1); KnownZero &= KnownZero2; KnownOne &= KnownOne2; // If all bits have been ruled out, there's no need to check @@ -673,9 +723,16 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, if (IntrinsicInst *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { default: break; - case Intrinsic::ctpop: case Intrinsic::ctlz: case Intrinsic::cttz: { + unsigned LowBits = Log2_32(BitWidth)+1; + // If this call is undefined for 0, the result will be less than 2^n. + if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) + LowBits -= 1; + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + break; + } + case Intrinsic::ctpop: { unsigned LowBits = Log2_32(BitWidth)+1; KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); break; @@ -687,6 +744,34 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, } } break; + case Instruction::ExtractValue: + if (IntrinsicInst *II = dyn_cast(I->getOperand(0))) { + ExtractValueInst *EVI = cast(I); + if (EVI->getNumIndices() != 1) break; + if (EVI->getIndices()[0] == 0) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + ComputeMaskedBitsAddSub(true, II->getArgOperand(0), + II->getArgOperand(1), false, KnownZero, + KnownOne, KnownZero2, KnownOne2, TD, Depth); + break; + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + ComputeMaskedBitsAddSub(false, II->getArgOperand(0), + II->getArgOperand(1), false, KnownZero, + KnownOne, KnownZero2, KnownOne2, TD, Depth); + break; + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1), + false, KnownZero, KnownOne, + KnownZero2, KnownOne2, TD, Depth); + break; + } + } + } } } @@ -702,8 +787,7 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, } APInt ZeroBits(BitWidth, 0); APInt OneBits(BitWidth, 0); - ComputeMaskedBits(V, APInt::getSignBit(BitWidth), ZeroBits, OneBits, TD, - Depth); + ComputeMaskedBits(V, ZeroBits, OneBits, TD, Depth); KnownOne = OneBits[BitWidth - 1]; KnownZero = ZeroBits[BitWidth - 1]; } @@ -712,10 +796,15 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, /// bit set when defined. For vectors return true if every element is known to /// be a power of two when defined. Supports values with integer or pointer /// types and vectors of integers. -bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) { - if (ConstantInt *CI = dyn_cast(V)) - return CI->getValue().isPowerOf2(); - // TODO: Handle vector constants. +bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, bool OrZero, + unsigned Depth) { + if (Constant *C = dyn_cast(V)) { + if (C->isNullValue()) + return OrZero; + if (ConstantInt *CI = dyn_cast(C)) + return CI->getValue().isPowerOf2(); + // TODO: Handle vector constants. + } // 1 << X is clearly a power of two if the one is not shifted off the end. If // it is shifted off the end then the result is undefined. @@ -731,21 +820,36 @@ bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) { if (Depth++ == MaxDepth) return false; + Value *X = 0, *Y = 0; + // A shift of a power of two is a power of two or zero. + if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || + match(V, m_Shr(m_Value(X), m_Value())))) + return isPowerOfTwo(X, TD, /*OrZero*/true, Depth); + if (ZExtInst *ZI = dyn_cast(V)) - return isPowerOfTwo(ZI->getOperand(0), TD, Depth); + return isPowerOfTwo(ZI->getOperand(0), TD, OrZero, Depth); if (SelectInst *SI = dyn_cast(V)) - return isPowerOfTwo(SI->getTrueValue(), TD, Depth) && - isPowerOfTwo(SI->getFalseValue(), TD, Depth); + return isPowerOfTwo(SI->getTrueValue(), TD, OrZero, Depth) && + isPowerOfTwo(SI->getFalseValue(), TD, OrZero, Depth); + + if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) { + // A power of two and'd with anything is a power of two or zero. + if (isPowerOfTwo(X, TD, /*OrZero*/true, Depth) || + isPowerOfTwo(Y, TD, /*OrZero*/true, Depth)) + return true; + // X & (-X) is always a power of two or zero. + if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X)))) + return true; + return false; + } // An exact divide or right shift can only shift off zero bits, so the result // is a power of two only if the first operand is a power of two and not // copying a sign bit (sdiv int_min, 2). - if (match(V, m_LShr(m_Value(), m_Value())) || - match(V, m_UDiv(m_Value(), m_Value()))) { - PossiblyExactOperator *PEO = cast(V); - if (PEO->isExact()) - return isPowerOfTwo(PEO->getOperand(0), TD, Depth); + if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) || + match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) { + return isPowerOfTwo(cast(V)->getOperand(0), TD, OrZero, Depth); } return false; @@ -767,7 +871,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { } // The remaining tests are all recursive, so bail out if we hit the limit. - if (Depth++ == MaxDepth) + if (Depth++ >= MaxDepth) return false; unsigned BitWidth = getBitWidth(V->getType(), TD); @@ -785,13 +889,13 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { // if the lowest bit is shifted off the end. if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) { // shl nuw can't remove any non-zero bits. - BinaryOperator *BO = cast(V); + OverflowingBinaryOperator *BO = cast(V); if (BO->hasNoUnsignedWrap()) return isKnownNonZero(X, TD, Depth); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(X, APInt(BitWidth, 1), KnownZero, KnownOne, TD, Depth); + ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth); if (KnownOne[0]) return true; } @@ -799,7 +903,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { // defined if the sign bit is shifted off the end. else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) { // shr exact can only shift out zero bits. - BinaryOperator *BO = cast(V); + PossiblyExactOperator *BO = cast(V); if (BO->isExact()) return isKnownNonZero(X, TD, Depth); @@ -809,10 +913,8 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { return true; } // div exact can only produce a zero if the dividend is zero. - else if (match(V, m_IDiv(m_Value(X), m_Value()))) { - BinaryOperator *BO = cast(V); - if (BO->isExact()) - return isKnownNonZero(X, TD, Depth); + else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) { + return isKnownNonZero(X, TD, Depth); } // X + Y. else if (match(V, m_Add(m_Value(X), m_Value(Y)))) { @@ -835,20 +937,29 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { APInt Mask = APInt::getSignedMaxValue(BitWidth); // The sign bit of X is set. If some other bit is set then X is not equal // to INT_MIN. - ComputeMaskedBits(X, Mask, KnownZero, KnownOne, TD, Depth); + ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth); if ((KnownOne & Mask) != 0) return true; // The sign bit of Y is set. If some other bit is set then Y is not equal // to INT_MIN. - ComputeMaskedBits(Y, Mask, KnownZero, KnownOne, TD, Depth); + ComputeMaskedBits(Y, KnownZero, KnownOne, TD, Depth); if ((KnownOne & Mask) != 0) return true; } // The sum of a non-negative number and a power of two is not zero. - if (XKnownNonNegative && isPowerOfTwo(Y, TD, Depth)) + if (XKnownNonNegative && isPowerOfTwo(Y, TD, /*OrZero*/false, Depth)) return true; - if (YKnownNonNegative && isPowerOfTwo(X, TD, Depth)) + if (YKnownNonNegative && isPowerOfTwo(X, TD, /*OrZero*/false, Depth)) + return true; + } + // X * Y. + else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) { + OverflowingBinaryOperator *BO = cast(V); + // If X and Y are non-zero then so is X * Y as long as the multiplication + // does not overflow. + if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) && + isKnownNonZero(X, TD, Depth) && isKnownNonZero(Y, TD, Depth)) return true; } // (C ? X : Y) != 0 if X != 0 and Y != 0. @@ -861,8 +972,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { if (!BitWidth) return false; APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(V, APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne, - TD, Depth); + ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); return KnownOne != 0; } @@ -878,7 +988,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const TargetData *TD, unsigned Depth) { APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); - ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); + ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); return (KnownZero & Mask) == Mask; } @@ -917,30 +1027,28 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp; - case Instruction::AShr: + case Instruction::AShr: { Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); - // ashr X, C -> adds C sign bits. - if (ConstantInt *C = dyn_cast(U->getOperand(1))) { - Tmp += C->getZExtValue(); + // ashr X, C -> adds C sign bits. Vectors too. + const APInt *ShAmt; + if (match(U->getOperand(1), m_APInt(ShAmt))) { + Tmp += ShAmt->getZExtValue(); if (Tmp > TyBits) Tmp = TyBits; } - // vector ashr X, -> adds C sign bits - if (ConstantVector *C = dyn_cast(U->getOperand(1))) { - if (ConstantInt *CI = dyn_cast_or_null(C->getSplatValue())) { - Tmp += CI->getZExtValue(); - if (Tmp > TyBits) Tmp = TyBits; - } - } return Tmp; - case Instruction::Shl: - if (ConstantInt *C = dyn_cast(U->getOperand(1))) { + } + case Instruction::Shl: { + const APInt *ShAmt; + if (match(U->getOperand(1), m_APInt(ShAmt))) { // shl destroys sign bits. Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); - if (C->getZExtValue() >= TyBits || // Bad shift. - C->getZExtValue() >= Tmp) break; // Shifted all sign bits out. - return Tmp - C->getZExtValue(); + Tmp2 = ShAmt->getZExtValue(); + if (Tmp2 >= TyBits || // Bad shift. + Tmp2 >= Tmp) break; // Shifted all sign bits out. + return Tmp - Tmp2; } break; + } case Instruction::And: case Instruction::Or: case Instruction::Xor: // NOT is handled here. @@ -971,13 +1079,11 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, if (ConstantInt *CRHS = dyn_cast(U->getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - APInt Mask = APInt::getAllOnesValue(TyBits); - ComputeMaskedBits(U->getOperand(0), Mask, KnownZero, KnownOne, TD, - Depth+1); + ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(TyBits, 1)) == Mask) + if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) return TyBits; // If we are subtracting one from a positive number, there is no carry @@ -998,12 +1104,10 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, if (ConstantInt *CLHS = dyn_cast(U->getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - APInt Mask = APInt::getAllOnesValue(TyBits); - ComputeMaskedBits(U->getOperand(1), Mask, KnownZero, KnownOne, - TD, Depth+1); + ComputeMaskedBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(TyBits, 1)) == Mask) + if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) return TyBits; // If the input is known to be positive (the sign bit is known clear), @@ -1045,8 +1149,8 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - APInt Mask = APInt::getAllOnesValue(TyBits); - ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); + APInt Mask; + ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); if (KnownZero.isNegative()) { // sign bit is 0 Mask = KnownZero; @@ -1282,23 +1386,21 @@ Value *llvm::isBytewiseValue(Value *V) { } } - // A ConstantArray is splatable if all its members are equal and also - // splatable. - if (ConstantArray *CA = dyn_cast(V)) { - if (CA->getNumOperands() == 0) - return 0; - - Value *Val = isBytewiseValue(CA->getOperand(0)); + // A ConstantDataArray/Vector is splatable if all its members are equal and + // also splatable. + if (ConstantDataSequential *CA = dyn_cast(V)) { + Value *Elt = CA->getElementAsConstant(0); + Value *Val = isBytewiseValue(Elt); if (!Val) return 0; - for (unsigned I = 1, E = CA->getNumOperands(); I != E; ++I) - if (CA->getOperand(I-1) != CA->getOperand(I)) + for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I) + if (CA->getElementAsConstant(I) != Elt) return 0; return Val; } - + // Conceptually, we could handle things like: // %a = zext i8 %X to i16 // %b = shl i16 %a, 8 @@ -1395,50 +1497,44 @@ static Value *BuildSubAggregate(Value *From, ArrayRef idx_range, Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, Instruction *InsertBefore) { // Nothing to index? Just return V then (this is useful at the end of our - // recursion) + // recursion). if (idx_range.empty()) return V; - // We have indices, so V should have an indexable type - assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) - && "Not looking at a struct or array?"); - assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) - && "Invalid indices for type?"); - CompositeType *PTy = cast(V->getType()); + // We have indices, so V should have an indexable type. + assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) && + "Not looking at a struct or array?"); + assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) && + "Invalid indices for type?"); - if (isa(V)) - return UndefValue::get(ExtractValueInst::getIndexedType(PTy, - idx_range)); - else if (isa(V)) - return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy, - idx_range)); - else if (Constant *C = dyn_cast(V)) { - if (isa(C) || isa(C)) - // Recursively process this constant - return FindInsertedValue(C->getOperand(idx_range[0]), idx_range.slice(1), - InsertBefore); - } else if (InsertValueInst *I = dyn_cast(V)) { + if (Constant *C = dyn_cast(V)) { + C = C->getAggregateElement(idx_range[0]); + if (C == 0) return 0; + return FindInsertedValue(C, idx_range.slice(1), InsertBefore); + } + + if (InsertValueInst *I = dyn_cast(V)) { // Loop the indices for the insertvalue instruction in parallel with the // requested indices const unsigned *req_idx = idx_range.begin(); for (const unsigned *i = I->idx_begin(), *e = I->idx_end(); i != e; ++i, ++req_idx) { if (req_idx == idx_range.end()) { - if (InsertBefore) - // The requested index identifies a part of a nested aggregate. Handle - // this specially. For example, - // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0 - // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1 - // %C = extractvalue {i32, { i32, i32 } } %B, 1 - // This can be changed into - // %A = insertvalue {i32, i32 } undef, i32 10, 0 - // %C = insertvalue {i32, i32 } %A, i32 11, 1 - // which allows the unused 0,0 element from the nested struct to be - // removed. - return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx), - InsertBefore); - else - // We can't handle this without inserting insertvalues + // We can't handle this without inserting insertvalues + if (!InsertBefore) return 0; + + // The requested index identifies a part of a nested aggregate. Handle + // this specially. For example, + // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0 + // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1 + // %C = extractvalue {i32, { i32, i32 } } %B, 1 + // This can be changed into + // %A = insertvalue {i32, i32 } undef, i32 10, 0 + // %C = insertvalue {i32, i32 } %A, i32 11, 1 + // which allows the unused 0,0 element from the nested struct to be + // removed. + return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx), + InsertBefore); } // This insert value inserts something else than what we are looking for. @@ -1454,7 +1550,9 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, return FindInsertedValue(I->getInsertedValueOperand(), makeArrayRef(req_idx, idx_range.end()), InsertBefore); - } else if (ExtractValueInst *I = dyn_cast(V)) { + } + + if (ExtractValueInst *I = dyn_cast(V)) { // If we're extracting a value from an aggregrate that was extracted from // something else, we can extract from that something else directly instead. // However, we will need to chain I's indices with the requested indices. @@ -1486,7 +1584,8 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const TargetData &TD) { Operator *PtrOp = dyn_cast(Ptr); - if (PtrOp == 0) return Ptr; + if (PtrOp == 0 || Ptr->getType()->isVectorTy()) + return Ptr; // Just look through bitcasts. if (PtrOp->getOpcode() == Instruction::BitCast) @@ -1521,34 +1620,19 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, } -/// GetConstantStringInfo - This function computes the length of a +/// getConstantStringInfo - This function computes the length of a /// null-terminated C string pointed to by V. If successful, it returns true /// and returns the string in Str. If unsuccessful, it returns false. -bool llvm::GetConstantStringInfo(const Value *V, std::string &Str, - uint64_t Offset, - bool StopAtNul) { - // If V is NULL then return false; - if (V == NULL) return false; +bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, + uint64_t Offset, bool TrimAtNul) { + assert(V); - // Look through bitcast instructions. - if (const BitCastInst *BCI = dyn_cast(V)) - return GetConstantStringInfo(BCI->getOperand(0), Str, Offset, StopAtNul); + // Look through bitcast instructions and geps. + V = V->stripPointerCasts(); - // If the value is not a GEP instruction nor a constant expression with a - // GEP instruction, then return false because ConstantArray can't occur - // any other way - const User *GEP = 0; - if (const GetElementPtrInst *GEPI = dyn_cast(V)) { - GEP = GEPI; - } else if (const ConstantExpr *CE = dyn_cast(V)) { - if (CE->getOpcode() == Instruction::BitCast) - return GetConstantStringInfo(CE->getOperand(0), Str, Offset, StopAtNul); - if (CE->getOpcode() != Instruction::GetElementPtr) - return false; - GEP = CE; - } - - if (GEP) { + // If the value is a GEP instructionor constant expression, treat it as an + // offset. + if (const GEPOperator *GEP = dyn_cast(V)) { // Make sure the GEP has exactly three arguments. if (GEP->getNumOperands() != 3) return false; @@ -1573,51 +1657,48 @@ bool llvm::GetConstantStringInfo(const Value *V, std::string &Str, StartIdx = CI->getZExtValue(); else return false; - return GetConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset, - StopAtNul); + return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset); } - + // The GEP instruction, constant or instruction, must reference a global // variable that is a constant and is initialized. The referenced constant // initializer is the array that we'll use for optimization. - const GlobalVariable* GV = dyn_cast(V); + const GlobalVariable *GV = dyn_cast(V); if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return false; - const Constant *GlobalInit = GV->getInitializer(); - - // Handle the ConstantAggregateZero case - if (isa(GlobalInit)) { + + // Handle the all-zeros case + if (GV->getInitializer()->isNullValue()) { // This is a degenerate case. The initializer is constant zero so the // length of the string must be zero. - Str.clear(); + Str = ""; return true; } // Must be a Constant Array - const ConstantArray *Array = dyn_cast(GlobalInit); - if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8)) + const ConstantDataArray *Array = + dyn_cast(GV->getInitializer()); + if (Array == 0 || !Array->isString()) return false; // Get the number of elements in the array - uint64_t NumElts = Array->getType()->getNumElements(); - + uint64_t NumElts = Array->getType()->getArrayNumElements(); + + // Start out with the entire array in the StringRef. + Str = Array->getAsString(); + if (Offset > NumElts) return false; - // Traverse the constant array from 'Offset' which is the place the GEP refers - // to in the array. - Str.reserve(NumElts-Offset); - for (unsigned i = Offset; i != NumElts; ++i) { - const Constant *Elt = Array->getOperand(i); - const ConstantInt *CI = dyn_cast(Elt); - if (!CI) // This array isn't suitable, non-int initializer. - return false; - if (StopAtNul && CI->isZero()) - return true; // we found end of string, success! - Str += (char)CI->getZExtValue(); - } + // Skip over 'offset' bytes. + Str = Str.substr(Offset); - // The array isn't null terminated, but maybe this is a memcpy, not a strcpy. + if (TrimAtNul) { + // Trim off the \0 and anything after it. If the array is not nul + // terminated, we just return the whole end of string. The client may know + // some other way that the string is length-bound. + Str = Str.substr(0, Str.find('\0')); + } return true; } @@ -1629,8 +1710,7 @@ bool llvm::GetConstantStringInfo(const Value *V, std::string &Str, /// the specified pointer, return 'len+1'. If we can't, return 0. static uint64_t GetStringLengthH(Value *V, SmallPtrSet &PHIs) { // Look through noop bitcast instructions. - if (BitCastInst *BCI = dyn_cast(V)) - return GetStringLengthH(BCI->getOperand(0), PHIs); + V = V->stripPointerCasts(); // If this is a PHI node, there are two cases: either we have already seen it // or we haven't. @@ -1666,75 +1746,13 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet &PHIs) { if (Len1 != Len2) return 0; return Len1; } - - // If the value is not a GEP instruction nor a constant expression with a - // GEP instruction, then return unknown. - User *GEP = 0; - if (GetElementPtrInst *GEPI = dyn_cast(V)) { - GEP = GEPI; - } else if (ConstantExpr *CE = dyn_cast(V)) { - if (CE->getOpcode() != Instruction::GetElementPtr) - return 0; - GEP = CE; - } else { - return 0; - } - - // Make sure the GEP has exactly three arguments. - if (GEP->getNumOperands() != 3) + + // Otherwise, see if we can read the string. + StringRef StrData; + if (!getConstantStringInfo(V, StrData)) return 0; - // Check to make sure that the first operand of the GEP is an integer and - // has value 0 so that we are sure we're indexing into the initializer. - if (ConstantInt *Idx = dyn_cast(GEP->getOperand(1))) { - if (!Idx->isZero()) - return 0; - } else - return 0; - - // If the second index isn't a ConstantInt, then this is a variable index - // into the array. If this occurs, we can't say anything meaningful about - // the string. - uint64_t StartIdx = 0; - if (ConstantInt *CI = dyn_cast(GEP->getOperand(2))) - StartIdx = CI->getZExtValue(); - else - return 0; - - // The GEP instruction, constant or instruction, must reference a global - // variable that is a constant and is initialized. The referenced constant - // initializer is the array that we'll use for optimization. - GlobalVariable* GV = dyn_cast(GEP->getOperand(0)); - if (!GV || !GV->isConstant() || !GV->hasInitializer() || - GV->mayBeOverridden()) - return 0; - Constant *GlobalInit = GV->getInitializer(); - - // Handle the ConstantAggregateZero case, which is a degenerate case. The - // initializer is constant zero so the length of the string must be zero. - if (isa(GlobalInit)) - return 1; // Len = 0 offset by 1. - - // Must be a Constant Array - ConstantArray *Array = dyn_cast(GlobalInit); - if (!Array || !Array->getType()->getElementType()->isIntegerTy(8)) - return false; - - // Get the number of elements in the array - uint64_t NumElts = Array->getType()->getNumElements(); - - // Traverse the constant array from StartIdx (derived above) which is - // the place the GEP refers to in the array. - for (unsigned i = StartIdx; i != NumElts; ++i) { - Constant *Elt = Array->getOperand(i); - ConstantInt *CI = dyn_cast(Elt); - if (!CI) // This array isn't suitable, non-int initializer. - return 0; - if (CI->isZero()) - return i-StartIdx+1; // We found end of string, success! - } - - return 0; // The array isn't null terminated, conservatively return 'unknown'. + return StrData.size()+1; } /// GetStringLength - If we can compute the length of the string pointed to by @@ -1793,3 +1811,94 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { } return true; } + +bool llvm::isSafeToSpeculativelyExecute(const Value *V, + const TargetData *TD) { + const Operator *Inst = dyn_cast(V); + if (!Inst) + return false; + + for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) + if (Constant *C = dyn_cast(Inst->getOperand(i))) + if (C->canTrap()) + return false; + + switch (Inst->getOpcode()) { + default: + return true; + case Instruction::UDiv: + case Instruction::URem: + // x / y is undefined if y == 0, but calcuations like x / 3 are safe. + return isKnownNonZero(Inst->getOperand(1), TD); + case Instruction::SDiv: + case Instruction::SRem: { + Value *Op = Inst->getOperand(1); + // x / y is undefined if y == 0 + if (!isKnownNonZero(Op, TD)) + return false; + // x / y might be undefined if y == -1 + unsigned BitWidth = getBitWidth(Op->getType(), TD); + if (BitWidth == 0) + return false; + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + ComputeMaskedBits(Op, KnownZero, KnownOne, TD); + return !!KnownZero; + } + case Instruction::Load: { + const LoadInst *LI = cast(Inst); + if (!LI->isUnordered()) + return false; + return LI->getPointerOperand()->isDereferenceablePointer(); + } + case Instruction::Call: { + if (const IntrinsicInst *II = dyn_cast(Inst)) { + switch (II->getIntrinsicID()) { + // These synthetic intrinsics have no side-effects, and just mark + // information about their operands. + // FIXME: There are other no-op synthetic instructions that potentially + // should be considered at least *safe* to speculate... + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + return true; + + case Intrinsic::bswap: + case Intrinsic::ctlz: + case Intrinsic::ctpop: + case Intrinsic::cttz: + case Intrinsic::objectsize: + case Intrinsic::sadd_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::umul_with_overflow: + case Intrinsic::usub_with_overflow: + return true; + // TODO: some fp intrinsics are marked as having the same error handling + // as libm. They're safe to speculate when they won't error. + // TODO: are convert_{from,to}_fp16 safe? + // TODO: can we list target-specific intrinsics here? + default: break; + } + } + return false; // The called function could have undefined behavior or + // side-effects, even if marked readnone nounwind. + } + case Instruction::VAArg: + case Instruction::Alloca: + case Instruction::Invoke: + case Instruction::PHI: + case Instruction::Store: + case Instruction::Ret: + case Instruction::Br: + case Instruction::IndirectBr: + case Instruction::Switch: + case Instruction::Unreachable: + case Instruction::Fence: + case Instruction::LandingPad: + case Instruction::AtomicRMW: + case Instruction::AtomicCmpXchg: + case Instruction::Resume: + return false; // Misc instructions which have effects + } +} diff --git a/contrib/llvm/lib/Archive/ArchiveReader.cpp b/contrib/llvm/lib/Archive/ArchiveReader.cpp index eef6fe0b1c1d..68873e2768d3 100644 --- a/contrib/llvm/lib/Archive/ArchiveReader.cpp +++ b/contrib/llvm/lib/Archive/ArchiveReader.cpp @@ -12,9 +12,11 @@ //===----------------------------------------------------------------------===// #include "ArchiveInternals.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Module.h" +#include #include #include using namespace llvm; @@ -504,7 +506,7 @@ Archive::findModuleDefiningSymbol(const std::string& symbol, // Modules that define those symbols. bool Archive::findModulesDefiningSymbols(std::set& symbols, - std::set& result, + SmallVectorImpl& result, std::string* error) { if (!mapfile || !base) { if (error) @@ -569,21 +571,26 @@ Archive::findModulesDefiningSymbols(std::set& symbols, // At this point we have a valid symbol table (one way or another) so we // just use it to quickly find the symbols requested. + SmallPtrSet Added; for (std::set::iterator I=symbols.begin(), - E=symbols.end(); I != E;) { + Next = I, + E=symbols.end(); I != E; I = Next) { + // Increment Next before we invalidate it. + ++Next; + // See if this symbol exists Module* m = findModuleDefiningSymbol(*I,error); - if (m) { - // The symbol exists, insert the Module into our result, duplicates will - // be ignored. - result.insert(m); + if (!m) + continue; + bool NewMember = Added.insert(m); + if (!NewMember) + continue; - // Remove the symbol now that its been resolved, being careful to - // post-increment the iterator. - symbols.erase(I++); - } else { - ++I; - } + // The symbol exists, insert the Module into our result. + result.push_back(m); + + // Remove the symbol now that its been resolved. + symbols.erase(I); } return true; } diff --git a/contrib/llvm/lib/Archive/ArchiveWriter.cpp b/contrib/llvm/lib/Archive/ArchiveWriter.cpp index 8fcc7aa29cc8..9ef29432ddf2 100644 --- a/contrib/llvm/lib/Archive/ArchiveWriter.cpp +++ b/contrib/llvm/lib/Archive/ArchiveWriter.cpp @@ -182,11 +182,11 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where, if (hasSlash || filePath.str().length() > 15) flags |= ArchiveMember::HasLongFilenameFlag; - sys::LLVMFileType type; + sys::fs::file_magic type; if (sys::fs::identify_magic(mbr->path.str(), type)) - type = sys::Unknown_FileType; + type = sys::fs::file_magic::unknown; switch (type) { - case sys::Bitcode_FileType: + case sys::fs::file_magic::bitcode: flags |= ArchiveMember::BitcodeFlag; break; default: diff --git a/contrib/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm/lib/AsmParser/LLLexer.cpp index d0dd98627bab..8818168f643d 100644 --- a/contrib/llvm/lib/AsmParser/LLLexer.cpp +++ b/contrib/llvm/lib/AsmParser/LLLexer.cpp @@ -29,7 +29,7 @@ using namespace llvm; bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const { - ErrorInfo = SM.GetMessage(ErrorLoc, Msg, "error"); + ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg); return true; } @@ -55,18 +55,22 @@ uint64_t LLLexer::atoull(const char *Buffer, const char *End) { return Result; } +static char parseHexChar(char C) { + if (C >= '0' && C <= '9') + return C-'0'; + if (C >= 'A' && C <= 'F') + return C-'A'+10; + if (C >= 'a' && C <= 'f') + return C-'a'+10; + return 0; +} + uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) { uint64_t Result = 0; for (; Buffer != End; ++Buffer) { uint64_t OldRes = Result; Result *= 16; - char C = *Buffer; - if (C >= '0' && C <= '9') - Result += C-'0'; - else if (C >= 'A' && C <= 'F') - Result += C-'A'+10; - else if (C >= 'a' && C <= 'f') - Result += C-'a'+10; + Result += parseHexChar(*Buffer); if (Result < OldRes) { // Uh, oh, overflow detected!!! Error("constant bigger than 64 bits detected!"); @@ -82,24 +86,12 @@ void LLLexer::HexToIntPair(const char *Buffer, const char *End, for (int i=0; i<16; i++, Buffer++) { assert(Buffer != End); Pair[0] *= 16; - char C = *Buffer; - if (C >= '0' && C <= '9') - Pair[0] += C-'0'; - else if (C >= 'A' && C <= 'F') - Pair[0] += C-'A'+10; - else if (C >= 'a' && C <= 'f') - Pair[0] += C-'a'+10; + Pair[0] += parseHexChar(*Buffer); } Pair[1] = 0; for (int i=0; i<16 && Buffer != End; i++, Buffer++) { Pair[1] *= 16; - char C = *Buffer; - if (C >= '0' && C <= '9') - Pair[1] += C-'0'; - else if (C >= 'A' && C <= 'F') - Pair[1] += C-'A'+10; - else if (C >= 'a' && C <= 'f') - Pair[1] += C-'a'+10; + Pair[1] += parseHexChar(*Buffer); } if (Buffer != End) Error("constant bigger than 128 bits detected!"); @@ -113,24 +105,12 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End, for (int i=0; i<4 && Buffer != End; i++, Buffer++) { assert(Buffer != End); Pair[1] *= 16; - char C = *Buffer; - if (C >= '0' && C <= '9') - Pair[1] += C-'0'; - else if (C >= 'A' && C <= 'F') - Pair[1] += C-'A'+10; - else if (C >= 'a' && C <= 'f') - Pair[1] += C-'a'+10; + Pair[1] += parseHexChar(*Buffer); } Pair[0] = 0; for (int i=0; i<16; i++, Buffer++) { Pair[0] *= 16; - char C = *Buffer; - if (C >= '0' && C <= '9') - Pair[0] += C-'0'; - else if (C >= 'A' && C <= 'F') - Pair[0] += C-'A'+10; - else if (C >= 'a' && C <= 'f') - Pair[0] += C-'a'+10; + Pair[0] += parseHexChar(*Buffer); } if (Buffer != End) Error("constant bigger than 128 bits detected!"); @@ -149,9 +129,7 @@ static void UnEscapeLexed(std::string &Str) { *BOut++ = '\\'; // Two \ becomes one BIn += 2; } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) { - char Tmp = BIn[3]; BIn[3] = 0; // Terminate string - *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number - BIn[3] = Tmp; // Restore character + *BOut = parseHexChar(BIn[1]) * 16 + parseHexChar(BIn[2]); BIn += 3; // Skip over handled chars ++BOut; } else { @@ -503,6 +481,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(tail); KEYWORD(target); KEYWORD(triple); + KEYWORD(unwind); KEYWORD(deplibs); KEYWORD(datalayout); KEYWORD(volatile); @@ -570,6 +549,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(noimplicitfloat); KEYWORD(naked); KEYWORD(nonlazybind); + KEYWORD(address_safety); KEYWORD(type); KEYWORD(opaque); @@ -596,6 +576,7 @@ lltok::Kind LLLexer::LexIdentifier() { if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ TyVal = LLVMTY; return lltok::Type; } TYPEKEYWORD("void", Type::getVoidTy(Context)); + TYPEKEYWORD("half", Type::getHalfTy(Context)); TYPEKEYWORD("float", Type::getFloatTy(Context)); TYPEKEYWORD("double", Type::getDoubleTy(Context)); TYPEKEYWORD("x86_fp80", Type::getX86_FP80Ty(Context)); @@ -642,7 +623,6 @@ lltok::Kind LLLexer::LexIdentifier() { INSTKEYWORD(indirectbr, IndirectBr); INSTKEYWORD(invoke, Invoke); INSTKEYWORD(resume, Resume); - INSTKEYWORD(unwind, Unwind); INSTKEYWORD(unreachable, Unreachable); INSTKEYWORD(alloca, Alloca); @@ -715,7 +695,7 @@ lltok::Kind LLLexer::Lex0x() { if (Kind == 'J') { // HexFPConstant - Floating point constant represented in IEEE format as a // hexadecimal number for when exponential notation is not precise enough. - // Float and double only. + // Half, Float, and double only. APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr))); return lltok::APFloat; } diff --git a/contrib/llvm/lib/AsmParser/LLLexer.h b/contrib/llvm/lib/AsmParser/LLLexer.h index 33b913572375..09aea5b01825 100644 --- a/contrib/llvm/lib/AsmParser/LLLexer.h +++ b/contrib/llvm/lib/AsmParser/LLLexer.h @@ -42,7 +42,6 @@ namespace llvm { APFloat APFloatVal; APSInt APSIntVal; - std::string TheError; public: explicit LLLexer(MemoryBuffer *StartBuf, SourceMgr &SM, SMDiagnostic &, LLVMContext &C); diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp index cafaab01afd9..068be3d47c33 100644 --- a/contrib/llvm/lib/AsmParser/LLParser.cpp +++ b/contrib/llvm/lib/AsmParser/LLParser.cpp @@ -120,11 +120,6 @@ bool LLParser::ValidateEndOfModule() { for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ) UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove - // Upgrade to new EH scheme. N.B. This will go away in 3.1. - UpgradeExceptionHandling(M); - - // Check debug info intrinsics. - CheckDebugInfoIntrinsics(M); return false; } @@ -879,7 +874,7 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) { /// ParseOptionalAttrs - Parse a potentially empty attribute list. AttrKind /// indicates what kind of attribute list this is: 0: function arg, 1: result, /// 2: function attr. -bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { +bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) { Attrs = Attribute::None; LocTy AttrLoc = Lex.getLoc(); @@ -924,6 +919,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break; case lltok::kw_naked: Attrs |= Attribute::Naked; break; case lltok::kw_nonlazybind: Attrs |= Attribute::NonLazyBind; break; + case lltok::kw_address_safety: Attrs |= Attribute::AddressSafety; break; case lltok::kw_alignstack: { unsigned Alignment; @@ -1047,13 +1043,11 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { case lltok::kw_cc: { unsigned ArbitraryCC; Lex.Lex(); - if (ParseUInt32(ArbitraryCC)) { + if (ParseUInt32(ArbitraryCC)) return true; - } else - CC = static_cast(ArbitraryCC); - return false; + CC = static_cast(ArbitraryCC); + return false; } - break; } Lex.Lex(); @@ -1069,7 +1063,7 @@ bool LLParser::ParseInstructionMetadata(Instruction *Inst, return TokError("expected metadata after comma"); std::string Name = Lex.getStrVal(); - unsigned MDK = M->getMDKindID(Name.c_str()); + unsigned MDK = M->getMDKindID(Name); Lex.Lex(); MDNode *Node; @@ -1358,8 +1352,8 @@ bool LLParser::ParseParameterList(SmallVectorImpl &ArgList, // Parse the argument. LocTy ArgLoc; Type *ArgTy = 0; - unsigned ArgAttrs1 = Attribute::None; - unsigned ArgAttrs2 = Attribute::None; + Attributes ArgAttrs1; + Attributes ArgAttrs2; Value *V; if (ParseType(ArgTy, ArgLoc)) return true; @@ -1399,7 +1393,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl &ArgList, } else { LocTy TypeLoc = Lex.getLoc(); Type *ArgTy = 0; - unsigned Attrs; + Attributes Attrs; std::string Name; if (ParseType(ArgTy) || @@ -1466,7 +1460,7 @@ bool LLParser::ParseFunctionType(Type *&Result) { for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { if (!ArgList[i].Name.empty()) return Error(ArgList[i].Loc, "argument name invalid in function type"); - if (ArgList[i].Attrs != 0) + if (ArgList[i].Attrs) return Error(ArgList[i].Loc, "argument attributes invalid in function type"); } @@ -1612,7 +1606,8 @@ bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) { if ((unsigned)Size != Size) return Error(SizeLoc, "size too large for vector"); if (!VectorType::isValidElementType(EltTy)) - return Error(TypeLoc, "vector element type must be fp or integer"); + return Error(TypeLoc, + "vector element type must be fp, integer or a pointer to these types"); Result = VectorType::get(EltTy, unsigned(Size)); } else { if (!ArrayType::isValidElementType(EltTy)) @@ -1971,9 +1966,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { return Error(ID.Loc, "constant vector must not be empty"); if (!Elts[0]->getType()->isIntegerTy() && - !Elts[0]->getType()->isFloatingPointTy()) + !Elts[0]->getType()->isFloatingPointTy() && + !Elts[0]->getType()->isPointerTy()) return Error(FirstEltLoc, - "vector elements must have integer or floating point type"); + "vector elements must have integer, pointer or floating point type"); // Verify that all the vector elements have the same type. for (unsigned i = 1, e = Elts.size(); i != e; ++i) @@ -2022,7 +2018,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { } case lltok::kw_c: // c "foo" Lex.Lex(); - ID.ConstantVal = ConstantArray::get(Context, Lex.getStrVal(), false); + ID.ConstantVal = ConstantDataArray::getString(Context, Lex.getStrVal(), + false); if (ParseToken(lltok::StringConstant, "expected string")) return true; ID.Kind = ValID::t_Constant; return false; @@ -2165,7 +2162,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { } else { assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!"); if (!Val0->getType()->isIntOrIntVectorTy() && - !Val0->getType()->isPointerTy()) + !Val0->getType()->getScalarType()->isPointerTy()) return Error(ID.Loc, "icmp requires pointer or integer operands"); ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1); } @@ -2299,7 +2296,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { return true; if (Opc == Instruction::GetElementPtr) { - if (Elts.size() == 0 || !Elts[0]->getType()->isPointerTy()) + if (Elts.size() == 0 || + !Elts[0]->getType()->getScalarType()->isPointerTy()) return Error(ID.Loc, "getelementptr requires pointer operand"); ArrayRef Indices(Elts.begin() + 1, Elts.end()); @@ -2440,7 +2438,6 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, return Error(ID.Loc, "functions are not values, refer to them as pointers"); switch (ID.Kind) { - default: llvm_unreachable("Unknown ValID!"); case ValID::t_LocalID: if (!PFS) return Error(ID.Loc, "invalid use of function-local name"); V = PFS->GetVal(ID.UIntVal, Ty, ID.Loc); @@ -2485,13 +2482,16 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, !ConstantFP::isValueValidForType(Ty, ID.APFloatVal)) return Error(ID.Loc, "floating point constant invalid for type"); - // The lexer has no type info, so builds all float and double FP constants - // as double. Fix this here. Long double does not need this. - if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble && - Ty->isFloatTy()) { + // The lexer has no type info, so builds all half, float, and double FP + // constants as double. Fix this here. Long double does not need this. + if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble) { bool Ignored; - ID.APFloatVal.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, - &Ignored); + if (Ty->isHalfTy()) + ID.APFloatVal.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, + &Ignored); + else if (Ty->isFloatTy()) + ID.APFloatVal.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, + &Ignored); } V = ConstantFP::get(Context, ID.APFloatVal); @@ -2549,6 +2549,7 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, return Error(ID.Loc, "constant expression type mismatch"); return false; } + llvm_unreachable("Invalid ValID"); } bool LLParser::ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS) { @@ -2585,7 +2586,8 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { LocTy LinkageLoc = Lex.getLoc(); unsigned Linkage; - unsigned Visibility, RetAttrs; + unsigned Visibility; + Attributes RetAttrs; CallingConv::ID CC; Type *RetType = 0; LocTy RetTypeLoc = Lex.getLoc(); @@ -2649,7 +2651,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { SmallVector ArgList; bool isVarArg; - unsigned FuncAttrs; + Attributes FuncAttrs; std::string Section; unsigned Alignment; std::string GC; @@ -2835,7 +2837,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) { } switch (ParseInstruction(Inst, BB, PFS)) { - default: assert(0 && "Unknown ParseInstruction result!"); + default: llvm_unreachable("Unknown ParseInstruction result!"); case InstError: return true; case InstNormal: BB->getInstList().push_back(Inst); @@ -2881,7 +2883,6 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, switch (Token) { default: return Error(Loc, "expected instruction opcode"); // Terminator Instructions. - case lltok::kw_unwind: Inst = new UnwindInst(Context); return false; case lltok::kw_unreachable: Inst = new UnreachableInst(Context); return false; case lltok::kw_ret: return ParseRet(Inst, BB, PFS); case lltok::kw_br: return ParseBr(Inst, PFS); @@ -2953,19 +2954,11 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_tail: return ParseCall(Inst, PFS, true); // Memory. case lltok::kw_alloca: return ParseAlloc(Inst, PFS); - case lltok::kw_load: return ParseLoad(Inst, PFS, false); - case lltok::kw_store: return ParseStore(Inst, PFS, false); + case lltok::kw_load: return ParseLoad(Inst, PFS); + case lltok::kw_store: return ParseStore(Inst, PFS); case lltok::kw_cmpxchg: return ParseCmpXchg(Inst, PFS); case lltok::kw_atomicrmw: return ParseAtomicRMW(Inst, PFS); case lltok::kw_fence: return ParseFence(Inst, PFS); - case lltok::kw_volatile: - // For compatibility; canonical location is after load - if (EatIfPresent(lltok::kw_load)) - return ParseLoad(Inst, PFS, true); - else if (EatIfPresent(lltok::kw_store)) - return ParseStore(Inst, PFS, true); - else - return TokError("expected 'load' or 'store'"); case lltok::kw_getelementptr: return ParseGetElementPtr(Inst, PFS); case lltok::kw_extractvalue: return ParseExtractValue(Inst, PFS); case lltok::kw_insertvalue: return ParseInsertValue(Inst, PFS); @@ -3169,7 +3162,7 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) { /// OptionalAttrs 'to' TypeAndValue 'unwind' TypeAndValue bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { LocTy CallLoc = Lex.getLoc(); - unsigned RetAttrs, FnAttrs; + Attributes RetAttrs, FnAttrs; CallingConv::ID CC; Type *RetType = 0; LocTy RetTypeLoc; @@ -3342,7 +3335,7 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS, } else { assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!"); if (!LHS->getType()->isIntOrIntVectorTy() && - !LHS->getType()->isPointerTy()) + !LHS->getType()->getScalarType()->isPointerTy()) return Error(Loc, "icmp requires integer operands"); Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS); } @@ -3462,7 +3455,7 @@ bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) { return true; if (!ShuffleVectorInst::isValidOperands(Op0, Op1, Op2)) - return Error(Loc, "invalid extractelement operands"); + return Error(Loc, "invalid shufflevector operands"); Inst = new ShuffleVectorInst(Op0, Op1, Op2); return false; @@ -3568,7 +3561,7 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) { /// ParameterList OptionalAttrs bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, bool isTail) { - unsigned RetAttrs, FnAttrs; + Attributes RetAttrs, FnAttrs; CallingConv::ID CC; Type *RetType = 0; LocTy RetTypeLoc; @@ -3689,10 +3682,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) { /// ::= 'load' 'volatile'? TypeAndValue (',' 'align' i32)? /// ::= 'load' 'atomic' 'volatile'? TypeAndValue /// 'singlethread'? AtomicOrdering (',' 'align' i32)? -/// Compatibility: -/// ::= 'volatile' 'load' TypeAndValue (',' 'align' i32)? -int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS, - bool isVolatile) { +int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { Value *Val; LocTy Loc; unsigned Alignment = 0; bool AteExtraComma = false; @@ -3701,15 +3691,12 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS, SynchronizationScope Scope = CrossThread; if (Lex.getKind() == lltok::kw_atomic) { - if (isVolatile) - return TokError("mixing atomic with old volatile placement"); isAtomic = true; Lex.Lex(); } + bool isVolatile = false; if (Lex.getKind() == lltok::kw_volatile) { - if (isVolatile) - return TokError("duplicate volatile before and after store"); isVolatile = true; Lex.Lex(); } @@ -3736,10 +3723,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS, /// ::= 'store' 'volatile'? TypeAndValue ',' TypeAndValue (',' 'align' i32)? /// ::= 'store' 'atomic' 'volatile'? TypeAndValue ',' TypeAndValue /// 'singlethread'? AtomicOrdering (',' 'align' i32)? -/// Compatibility: -/// ::= 'volatile' 'store' TypeAndValue ',' TypeAndValue (',' 'align' i32)? -int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS, - bool isVolatile) { +int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) { Value *Val, *Ptr; LocTy Loc, PtrLoc; unsigned Alignment = 0; bool AteExtraComma = false; @@ -3748,15 +3732,12 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS, SynchronizationScope Scope = CrossThread; if (Lex.getKind() == lltok::kw_atomic) { - if (isVolatile) - return TokError("mixing atomic with old volatile placement"); isAtomic = true; Lex.Lex(); } + bool isVolatile = false; if (Lex.getKind() == lltok::kw_volatile) { - if (isVolatile) - return TokError("duplicate volatile before and after store"); isVolatile = true; Lex.Lex(); } @@ -3902,13 +3883,15 @@ int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) { /// ParseGetElementPtr /// ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)* int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { - Value *Ptr, *Val; LocTy Loc, EltLoc; + Value *Ptr = 0; + Value *Val = 0; + LocTy Loc, EltLoc; bool InBounds = EatIfPresent(lltok::kw_inbounds); if (ParseTypeAndValue(Ptr, Loc, PFS)) return true; - if (!Ptr->getType()->isPointerTy()) + if (!Ptr->getType()->getScalarType()->isPointerTy()) return Error(Loc, "base of getelementptr must be a pointer"); SmallVector Indices; @@ -3919,11 +3902,23 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { break; } if (ParseTypeAndValue(Val, EltLoc, PFS)) return true; - if (!Val->getType()->isIntegerTy()) + if (!Val->getType()->getScalarType()->isIntegerTy()) return Error(EltLoc, "getelementptr index must be an integer"); + if (Val->getType()->isVectorTy() != Ptr->getType()->isVectorTy()) + return Error(EltLoc, "getelementptr index type missmatch"); + if (Val->getType()->isVectorTy()) { + unsigned ValNumEl = cast(Val->getType())->getNumElements(); + unsigned PtrNumEl = cast(Ptr->getType())->getNumElements(); + if (ValNumEl != PtrNumEl) + return Error(EltLoc, + "getelementptr vector index has a wrong number of elements"); + } Indices.push_back(Val); } + if (Val && Val->getType()->isVectorTy() && Indices.size() != 1) + return Error(EltLoc, "vector getelementptrs must have a single index"); + if (!GetElementPtrInst::getIndexedType(Ptr->getType(), Indices)) return Error(Loc, "invalid getelementptr indices"); Inst = GetElementPtrInst::Create(Ptr, Indices); diff --git a/contrib/llvm/lib/AsmParser/LLParser.h b/contrib/llvm/lib/AsmParser/LLParser.h index cbc3c23e8631..dda880838117 100644 --- a/contrib/llvm/lib/AsmParser/LLParser.h +++ b/contrib/llvm/lib/AsmParser/LLParser.h @@ -15,6 +15,7 @@ #define LLVM_ASMPARSER_LLPARSER_H #include "LLLexer.h" +#include "llvm/Attributes.h" #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Type.h" @@ -171,7 +172,7 @@ namespace llvm { return ParseUInt32(Val); } bool ParseOptionalAddrSpace(unsigned &AddrSpace); - bool ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind); + bool ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind); bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage); bool ParseOptionalLinkage(unsigned &Linkage) { bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage); @@ -304,8 +305,8 @@ namespace llvm { struct ParamInfo { LocTy Loc; Value *V; - unsigned Attrs; - ParamInfo(LocTy loc, Value *v, unsigned attrs) + Attributes Attrs; + ParamInfo(LocTy loc, Value *v, Attributes attrs) : Loc(loc), V(v), Attrs(attrs) {} }; bool ParseParameterList(SmallVectorImpl &ArgList, @@ -325,9 +326,9 @@ namespace llvm { struct ArgInfo { LocTy Loc; Type *Ty; - unsigned Attrs; + Attributes Attrs; std::string Name; - ArgInfo(LocTy L, Type *ty, unsigned Attr, const std::string &N) + ArgInfo(LocTy L, Type *ty, Attributes Attr, const std::string &N) : Loc(L), Ty(ty), Attrs(Attr), Name(N) {} }; bool ParseArgumentList(SmallVectorImpl &ArgList, bool &isVarArg); @@ -363,8 +364,8 @@ namespace llvm { bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS); bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail); int ParseAlloc(Instruction *&I, PerFunctionState &PFS); - int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile); - int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile); + int ParseLoad(Instruction *&I, PerFunctionState &PFS); + int ParseStore(Instruction *&I, PerFunctionState &PFS); int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS); int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS); int ParseFence(Instruction *&I, PerFunctionState &PFS); diff --git a/contrib/llvm/lib/AsmParser/LLToken.h b/contrib/llvm/lib/AsmParser/LLToken.h index 8f167725ed58..adf5d4f4d0f9 100644 --- a/contrib/llvm/lib/AsmParser/LLToken.h +++ b/contrib/llvm/lib/AsmParser/LLToken.h @@ -50,6 +50,7 @@ namespace lltok { kw_tail, kw_target, kw_triple, + kw_unwind, kw_deplibs, kw_datalayout, kw_volatile, @@ -102,6 +103,7 @@ namespace lltok { kw_noimplicitfloat, kw_naked, kw_nonlazybind, + kw_address_safety, kw_type, kw_opaque, @@ -126,7 +128,7 @@ namespace lltok { kw_landingpad, kw_personality, kw_cleanup, kw_catch, kw_filter, - kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_unwind, kw_resume, + kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_resume, kw_unreachable, kw_alloca, kw_load, kw_store, kw_fence, kw_cmpxchg, kw_atomicrmw, diff --git a/contrib/llvm/lib/AsmParser/Parser.cpp b/contrib/llvm/lib/AsmParser/Parser.cpp index 59fb471f2b93..21b7fd411e3d 100644 --- a/contrib/llvm/lib/AsmParser/Parser.cpp +++ b/contrib/llvm/lib/AsmParser/Parser.cpp @@ -44,7 +44,7 @@ Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err, LLVMContext &Context) { OwningPtr File; if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) { - Err = SMDiagnostic(Filename, + Err = SMDiagnostic(Filename, SourceMgr::DK_Error, "Could not open input file: " + ec.message()); return 0; } diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 46565f36af16..e3990403bd71 100644 --- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -22,11 +22,19 @@ #include "llvm/AutoUpgrade.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/DataStream.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/OperandTraits.h" using namespace llvm; +void BitcodeReader::materializeForwardReferencedFunctions() { + while (!BlockAddrFwdRefs.empty()) { + Function *F = BlockAddrFwdRefs.begin()->first; + F->Materialize(); + } +} + void BitcodeReader::FreeState() { if (BufferOwned) delete Buffer; @@ -394,7 +402,7 @@ Type *BitcodeReader::getTypeByID(unsigned ID) { // The type table size is always specified correctly. if (ID >= TypeList.size()) return 0; - + if (Type *Ty = TypeList[ID]) return Ty; @@ -403,14 +411,6 @@ Type *BitcodeReader::getTypeByID(unsigned ID) { return TypeList[ID] = StructType::create(Context); } -/// FIXME: Remove in LLVM 3.1, only used by ParseOldTypeTable. -Type *BitcodeReader::getTypeByIDOrNull(unsigned ID) { - if (ID >= TypeList.size()) - TypeList.resize(ID+1); - - return TypeList[ID]; -} - //===----------------------------------------------------------------------===// // Functions for parsing blocks from the bitcode file @@ -462,8 +462,8 @@ bool BitcodeReader::ParseAttributeBlock() { // If Function attributes are using index 0 then transfer them // to index ~0. Index 0 is used for return value attributes but used to be // used for function attributes. - Attributes RetAttribute = Attribute::None; - Attributes FnAttribute = Attribute::None; + Attributes RetAttribute; + Attributes FnAttribute; for (unsigned i = 0, e = Record.size(); i != e; i += 2) { // FIXME: remove in LLVM 3.0 // The alignment is stored as a 16-bit raw value from bits 31--16. @@ -473,23 +473,24 @@ bool BitcodeReader::ParseAttributeBlock() { if (Alignment && !isPowerOf2_32(Alignment)) return Error("Alignment is not a power of two."); - Attributes ReconstitutedAttr = Record[i+1] & 0xffff; + Attributes ReconstitutedAttr(Record[i+1] & 0xffff); if (Alignment) ReconstitutedAttr |= Attribute::constructAlignmentFromInt(Alignment); - ReconstitutedAttr |= (Record[i+1] & (0xffffull << 32)) >> 11; - Record[i+1] = ReconstitutedAttr; + ReconstitutedAttr |= + Attributes((Record[i+1] & (0xffffull << 32)) >> 11); + Record[i+1] = ReconstitutedAttr.Raw(); if (Record[i] == 0) - RetAttribute = Record[i+1]; + RetAttribute = ReconstitutedAttr; else if (Record[i] == ~0U) - FnAttribute = Record[i+1]; + FnAttribute = ReconstitutedAttr; } - unsigned OldRetAttrs = (Attribute::NoUnwind|Attribute::NoReturn| + Attributes OldRetAttrs = (Attribute::NoUnwind|Attribute::NoReturn| Attribute::ReadOnly|Attribute::ReadNone); if (FnAttribute == Attribute::None && RetAttribute != Attribute::None && - (RetAttribute & OldRetAttrs) != 0) { + (RetAttribute & OldRetAttrs)) { if (FnAttribute == Attribute::None) { // add a slot so they get added. Record.push_back(~0U); Record.push_back(0); @@ -506,8 +507,9 @@ bool BitcodeReader::ParseAttributeBlock() { } else if (Record[i] == ~0U) { if (FnAttribute != Attribute::None) Attrs.push_back(AttributeWithIndex::get(~0U, FnAttribute)); - } else if (Record[i+1] != Attribute::None) - Attrs.push_back(AttributeWithIndex::get(Record[i], Record[i+1])); + } else if (Attributes(Record[i+1]) != Attribute::None) + Attrs.push_back(AttributeWithIndex::get(Record[i], + Attributes(Record[i+1]))); } MAttributes.push_back(AttrListPtr::get(Attrs.begin(), Attrs.end())); @@ -521,7 +523,7 @@ bool BitcodeReader::ParseAttributeBlock() { bool BitcodeReader::ParseTypeTable() { if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW)) return Error("Malformed block record"); - + return ParseTypeTableBody(); } @@ -533,7 +535,7 @@ bool BitcodeReader::ParseTypeTableBody() { unsigned NumRecords = 0; SmallString<64> TypeName; - + // Read all the records for this type table. while (1) { unsigned Code = Stream.ReadCode(); @@ -573,6 +575,9 @@ bool BitcodeReader::ParseTypeTableBody() { case bitc::TYPE_CODE_VOID: // VOID ResultTy = Type::getVoidTy(Context); break; + case bitc::TYPE_CODE_HALF: // HALF + ResultTy = Type::getHalfTy(Context); + break; case bitc::TYPE_CODE_FLOAT: // FLOAT ResultTy = Type::getFloatTy(Context); break; @@ -615,12 +620,12 @@ bool BitcodeReader::ParseTypeTableBody() { ResultTy = PointerType::get(ResultTy, AddressSpace); break; } - case bitc::TYPE_CODE_FUNCTION: { + case bitc::TYPE_CODE_FUNCTION_OLD: { // FIXME: attrid is dead, remove it in LLVM 3.0 // FUNCTION: [vararg, attrid, retty, paramty x N] if (Record.size() < 3) return Error("Invalid FUNCTION type record"); - std::vector ArgTys; + SmallVector ArgTys; for (unsigned i = 3, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) ArgTys.push_back(T); @@ -635,10 +640,29 @@ bool BitcodeReader::ParseTypeTableBody() { ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); break; } + case bitc::TYPE_CODE_FUNCTION: { + // FUNCTION: [vararg, retty, paramty x N] + if (Record.size() < 2) + return Error("Invalid FUNCTION type record"); + SmallVector ArgTys; + for (unsigned i = 2, e = Record.size(); i != e; ++i) { + if (Type *T = getTypeByID(Record[i])) + ArgTys.push_back(T); + else + break; + } + + ResultTy = getTypeByID(Record[1]); + if (ResultTy == 0 || ArgTys.size() < Record.size()-2) + return Error("invalid type in function type"); + + ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); + break; + } case bitc::TYPE_CODE_STRUCT_ANON: { // STRUCT: [ispacked, eltty x N] if (Record.size() < 1) return Error("Invalid STRUCT type record"); - std::vector EltTys; + SmallVector EltTys; for (unsigned i = 1, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) EltTys.push_back(T); @@ -728,247 +752,6 @@ bool BitcodeReader::ParseTypeTableBody() { } } -// FIXME: Remove in LLVM 3.1 -bool BitcodeReader::ParseOldTypeTable() { - if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_OLD)) - return Error("Malformed block record"); - - if (!TypeList.empty()) - return Error("Multiple TYPE_BLOCKs found!"); - - - // While horrible, we have no good ordering of types in the bc file. Just - // iteratively parse types out of the bc file in multiple passes until we get - // them all. Do this by saving a cursor for the start of the type block. - BitstreamCursor StartOfTypeBlockCursor(Stream); - - unsigned NumTypesRead = 0; - - SmallVector Record; -RestartScan: - unsigned NextTypeID = 0; - bool ReadAnyTypes = false; - - // Read all the records for this type table. - while (1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (NextTypeID != TypeList.size()) - return Error("Invalid type forward reference in TYPE_BLOCK_ID_OLD"); - - // If we haven't read all of the types yet, iterate again. - if (NumTypesRead != TypeList.size()) { - // If we didn't successfully read any types in this pass, then we must - // have an unhandled forward reference. - if (!ReadAnyTypes) - return Error("Obsolete bitcode contains unhandled recursive type"); - - Stream = StartOfTypeBlockCursor; - goto RestartScan; - } - - if (Stream.ReadBlockEnd()) - return Error("Error at end of type table block"); - return false; - } - - if (Code == bitc::ENTER_SUBBLOCK) { - // No known subblocks, always skip them. - Stream.ReadSubBlockID(); - if (Stream.SkipBlock()) - return Error("Malformed block record"); - continue; - } - - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; - } - - // Read a record. - Record.clear(); - Type *ResultTy = 0; - switch (Stream.ReadRecord(Code, Record)) { - default: return Error("unknown type in type table"); - case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries] - // TYPE_CODE_NUMENTRY contains a count of the number of types in the - // type list. This allows us to reserve space. - if (Record.size() < 1) - return Error("Invalid TYPE_CODE_NUMENTRY record"); - TypeList.resize(Record[0]); - continue; - case bitc::TYPE_CODE_VOID: // VOID - ResultTy = Type::getVoidTy(Context); - break; - case bitc::TYPE_CODE_FLOAT: // FLOAT - ResultTy = Type::getFloatTy(Context); - break; - case bitc::TYPE_CODE_DOUBLE: // DOUBLE - ResultTy = Type::getDoubleTy(Context); - break; - case bitc::TYPE_CODE_X86_FP80: // X86_FP80 - ResultTy = Type::getX86_FP80Ty(Context); - break; - case bitc::TYPE_CODE_FP128: // FP128 - ResultTy = Type::getFP128Ty(Context); - break; - case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128 - ResultTy = Type::getPPC_FP128Ty(Context); - break; - case bitc::TYPE_CODE_LABEL: // LABEL - ResultTy = Type::getLabelTy(Context); - break; - case bitc::TYPE_CODE_METADATA: // METADATA - ResultTy = Type::getMetadataTy(Context); - break; - case bitc::TYPE_CODE_X86_MMX: // X86_MMX - ResultTy = Type::getX86_MMXTy(Context); - break; - case bitc::TYPE_CODE_INTEGER: // INTEGER: [width] - if (Record.size() < 1) - return Error("Invalid Integer type record"); - ResultTy = IntegerType::get(Context, Record[0]); - break; - case bitc::TYPE_CODE_OPAQUE: // OPAQUE - if (NextTypeID < TypeList.size() && TypeList[NextTypeID] == 0) - ResultTy = StructType::create(Context); - break; - case bitc::TYPE_CODE_STRUCT_OLD: {// STRUCT_OLD - if (NextTypeID >= TypeList.size()) break; - // If we already read it, don't reprocess. - if (TypeList[NextTypeID] && - !cast(TypeList[NextTypeID])->isOpaque()) - break; - - // Set a type. - if (TypeList[NextTypeID] == 0) - TypeList[NextTypeID] = StructType::create(Context); - - std::vector EltTys; - for (unsigned i = 1, e = Record.size(); i != e; ++i) { - if (Type *Elt = getTypeByIDOrNull(Record[i])) - EltTys.push_back(Elt); - else - break; - } - - if (EltTys.size() != Record.size()-1) - break; // Not all elements are ready. - - cast(TypeList[NextTypeID])->setBody(EltTys, Record[0]); - ResultTy = TypeList[NextTypeID]; - TypeList[NextTypeID] = 0; - break; - } - case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or - // [pointee type, address space] - if (Record.size() < 1) - return Error("Invalid POINTER type record"); - unsigned AddressSpace = 0; - if (Record.size() == 2) - AddressSpace = Record[1]; - if ((ResultTy = getTypeByIDOrNull(Record[0]))) - ResultTy = PointerType::get(ResultTy, AddressSpace); - break; - } - case bitc::TYPE_CODE_FUNCTION: { - // FIXME: attrid is dead, remove it in LLVM 3.0 - // FUNCTION: [vararg, attrid, retty, paramty x N] - if (Record.size() < 3) - return Error("Invalid FUNCTION type record"); - std::vector ArgTys; - for (unsigned i = 3, e = Record.size(); i != e; ++i) { - if (Type *Elt = getTypeByIDOrNull(Record[i])) - ArgTys.push_back(Elt); - else - break; - } - if (ArgTys.size()+3 != Record.size()) - break; // Something was null. - if ((ResultTy = getTypeByIDOrNull(Record[2]))) - ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); - break; - } - case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty] - if (Record.size() < 2) - return Error("Invalid ARRAY type record"); - if ((ResultTy = getTypeByIDOrNull(Record[1]))) - ResultTy = ArrayType::get(ResultTy, Record[0]); - break; - case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty] - if (Record.size() < 2) - return Error("Invalid VECTOR type record"); - if ((ResultTy = getTypeByIDOrNull(Record[1]))) - ResultTy = VectorType::get(ResultTy, Record[0]); - break; - } - - if (NextTypeID >= TypeList.size()) - return Error("invalid TYPE table"); - - if (ResultTy && TypeList[NextTypeID] == 0) { - ++NumTypesRead; - ReadAnyTypes = true; - - TypeList[NextTypeID] = ResultTy; - } - - ++NextTypeID; - } -} - - -bool BitcodeReader::ParseOldTypeSymbolTable() { - if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID_OLD)) - return Error("Malformed block record"); - - SmallVector Record; - - // Read all the records for this type table. - std::string TypeName; - while (1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (Stream.ReadBlockEnd()) - return Error("Error at end of type symbol table block"); - return false; - } - - if (Code == bitc::ENTER_SUBBLOCK) { - // No known subblocks, always skip them. - Stream.ReadSubBlockID(); - if (Stream.SkipBlock()) - return Error("Malformed block record"); - continue; - } - - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; - } - - // Read a record. - Record.clear(); - switch (Stream.ReadRecord(Code, Record)) { - default: // Default behavior: unknown type. - break; - case bitc::TST_CODE_ENTRY: // TST_ENTRY: [typeid, namechar x N] - if (ConvertToString(Record, 1, TypeName)) - return Error("Invalid TST_ENTRY record"); - unsigned TypeID = Record[0]; - if (TypeID >= TypeList.size()) - return Error("Invalid Type ID in TST_ENTRY record"); - - // Only apply the type name to a struct type with no name. - if (StructType *STy = dyn_cast(TypeList[TypeID])) - if (!STy->isLiteral() && !STy->hasName()) - STy->setName(TypeName); - TypeName.clear(); - break; - } - } -} - bool BitcodeReader::ParseValueSymbolTable() { if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID)) return Error("Malformed block record"); @@ -1262,7 +1045,9 @@ bool BitcodeReader::ParseConstants() { case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval] if (Record.empty()) return Error("Invalid FLOAT record"); - if (CurTy->isFloatTy()) + if (CurTy->isHalfTy()) + V = ConstantFP::get(Context, APFloat(APInt(16, (uint16_t)Record[0]))); + else if (CurTy->isFloatTy()) V = ConstantFP::get(Context, APFloat(APInt(32, (uint32_t)Record[0]))); else if (CurTy->isDoubleTy()) V = ConstantFP::get(Context, APFloat(APInt(64, Record[0]))); @@ -1286,7 +1071,7 @@ bool BitcodeReader::ParseConstants() { return Error("Invalid CST_AGGREGATE record"); unsigned Size = Record.size(); - std::vector Elts; + SmallVector Elts; if (StructType *STy = dyn_cast(CurTy)) { for (unsigned i = 0; i != Size; ++i) @@ -1308,35 +1093,78 @@ bool BitcodeReader::ParseConstants() { } break; } - case bitc::CST_CODE_STRING: { // STRING: [values] - if (Record.empty()) - return Error("Invalid CST_AGGREGATE record"); - - ArrayType *ATy = cast(CurTy); - Type *EltTy = ATy->getElementType(); - - unsigned Size = Record.size(); - std::vector Elts; - for (unsigned i = 0; i != Size; ++i) - Elts.push_back(ConstantInt::get(EltTy, Record[i])); - V = ConstantArray::get(ATy, Elts); - break; - } + case bitc::CST_CODE_STRING: // STRING: [values] case bitc::CST_CODE_CSTRING: { // CSTRING: [values] if (Record.empty()) - return Error("Invalid CST_AGGREGATE record"); - - ArrayType *ATy = cast(CurTy); - Type *EltTy = ATy->getElementType(); + return Error("Invalid CST_STRING record"); unsigned Size = Record.size(); - std::vector Elts; + SmallString<16> Elts; for (unsigned i = 0; i != Size; ++i) - Elts.push_back(ConstantInt::get(EltTy, Record[i])); - Elts.push_back(Constant::getNullValue(EltTy)); - V = ConstantArray::get(ATy, Elts); + Elts.push_back(Record[i]); + V = ConstantDataArray::getString(Context, Elts, + BitCode == bitc::CST_CODE_CSTRING); break; } + case bitc::CST_CODE_DATA: {// DATA: [n x value] + if (Record.empty()) + return Error("Invalid CST_DATA record"); + + Type *EltTy = cast(CurTy)->getElementType(); + unsigned Size = Record.size(); + + if (EltTy->isIntegerTy(8)) { + SmallVector Elts(Record.begin(), Record.end()); + if (isa(CurTy)) + V = ConstantDataVector::get(Context, Elts); + else + V = ConstantDataArray::get(Context, Elts); + } else if (EltTy->isIntegerTy(16)) { + SmallVector Elts(Record.begin(), Record.end()); + if (isa(CurTy)) + V = ConstantDataVector::get(Context, Elts); + else + V = ConstantDataArray::get(Context, Elts); + } else if (EltTy->isIntegerTy(32)) { + SmallVector Elts(Record.begin(), Record.end()); + if (isa(CurTy)) + V = ConstantDataVector::get(Context, Elts); + else + V = ConstantDataArray::get(Context, Elts); + } else if (EltTy->isIntegerTy(64)) { + SmallVector Elts(Record.begin(), Record.end()); + if (isa(CurTy)) + V = ConstantDataVector::get(Context, Elts); + else + V = ConstantDataArray::get(Context, Elts); + } else if (EltTy->isFloatTy()) { + SmallVector Elts; + for (unsigned i = 0; i != Size; ++i) { + union { uint32_t I; float F; }; + I = Record[i]; + Elts.push_back(F); + } + if (isa(CurTy)) + V = ConstantDataVector::get(Context, Elts); + else + V = ConstantDataArray::get(Context, Elts); + } else if (EltTy->isDoubleTy()) { + SmallVector Elts; + for (unsigned i = 0; i != Size; ++i) { + union { uint64_t I; double F; }; + I = Record[i]; + Elts.push_back(F); + } + if (isa(CurTy)) + V = ConstantDataVector::get(Context, Elts); + else + V = ConstantDataArray::get(Context, Elts); + } else { + return Error("Unknown element type in CE_DATA"); + } + break; + } + case bitc::CST_CODE_CE_BINOP: { // CE_BINOP: [opcode, opval, opval] if (Record.size() < 3) return Error("Invalid CE_BINOP record"); int Opc = GetDecodedBinaryOpcode(Record[0], CurTy); @@ -1517,6 +1345,50 @@ bool BitcodeReader::ParseConstants() { return false; } +bool BitcodeReader::ParseUseLists() { + if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID)) + return Error("Malformed block record"); + + SmallVector Record; + + // Read all the records. + while (1) { + unsigned Code = Stream.ReadCode(); + if (Code == bitc::END_BLOCK) { + if (Stream.ReadBlockEnd()) + return Error("Error at end of use-list table block"); + return false; + } + + if (Code == bitc::ENTER_SUBBLOCK) { + // No known subblocks, always skip them. + Stream.ReadSubBlockID(); + if (Stream.SkipBlock()) + return Error("Malformed block record"); + continue; + } + + if (Code == bitc::DEFINE_ABBREV) { + Stream.ReadAbbrevRecord(); + continue; + } + + // Read a use list record. + Record.clear(); + switch (Stream.ReadRecord(Code, Record)) { + default: // Default behavior: unknown type. + break; + case bitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD. + unsigned RecordLength = Record.size(); + if (RecordLength < 1) + return Error ("Invalid UseList reader!"); + UseListRecords.push_back(Record); + break; + } + } + } +} + /// RememberAndSkipFunctionBody - When we see the block for a function body, /// remember where it is and then skip it. This lets us lazily deserialize the /// functions. @@ -1538,8 +1410,36 @@ bool BitcodeReader::RememberAndSkipFunctionBody() { return false; } -bool BitcodeReader::ParseModule() { - if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) +bool BitcodeReader::GlobalCleanup() { + // Patch the initializers for globals and aliases up. + ResolveGlobalAndAliasInits(); + if (!GlobalInits.empty() || !AliasInits.empty()) + return Error("Malformed global initializer set"); + + // Look for intrinsic functions which need to be upgraded at some point + for (Module::iterator FI = TheModule->begin(), FE = TheModule->end(); + FI != FE; ++FI) { + Function *NewFn; + if (UpgradeIntrinsicFunction(FI, NewFn)) + UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn)); + } + + // Look for global variables which need to be renamed. + for (Module::global_iterator + GI = TheModule->global_begin(), GE = TheModule->global_end(); + GI != GE; ++GI) + UpgradeGlobalVariable(GI); + // Force deallocation of memory for these vectors to favor the client that + // want lazy deserialization. + std::vector >().swap(GlobalInits); + std::vector >().swap(AliasInits); + return false; +} + +bool BitcodeReader::ParseModule(bool Resume) { + if (Resume) + Stream.JumpToBit(NextUnreadBit); + else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return Error("Malformed block record"); SmallVector Record; @@ -1553,33 +1453,7 @@ bool BitcodeReader::ParseModule() { if (Stream.ReadBlockEnd()) return Error("Error at end of module block"); - // Patch the initializers for globals and aliases up. - ResolveGlobalAndAliasInits(); - if (!GlobalInits.empty() || !AliasInits.empty()) - return Error("Malformed global initializer set"); - if (!FunctionsWithBodies.empty()) - return Error("Too few function bodies found"); - - // Look for intrinsic functions which need to be upgraded at some point - for (Module::iterator FI = TheModule->begin(), FE = TheModule->end(); - FI != FE; ++FI) { - Function* NewFn; - if (UpgradeIntrinsicFunction(FI, NewFn)) - UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn)); - } - - // Look for global variables which need to be renamed. - for (Module::global_iterator - GI = TheModule->global_begin(), GE = TheModule->global_end(); - GI != GE; ++GI) - UpgradeGlobalVariable(GI); - - // Force deallocation of memory for these vectors to favor the client that - // want lazy deserialization. - std::vector >().swap(GlobalInits); - std::vector >().swap(AliasInits); - std::vector().swap(FunctionsWithBodies); - return false; + return GlobalCleanup(); } if (Code == bitc::ENTER_SUBBLOCK) { @@ -1600,17 +1474,10 @@ bool BitcodeReader::ParseModule() { if (ParseTypeTable()) return true; break; - case bitc::TYPE_BLOCK_ID_OLD: - if (ParseOldTypeTable()) - return true; - break; - case bitc::TYPE_SYMTAB_BLOCK_ID_OLD: - if (ParseOldTypeSymbolTable()) - return true; - break; case bitc::VALUE_SYMTAB_BLOCK_ID: if (ParseValueSymbolTable()) return true; + SeenValueSymbolTable = true; break; case bitc::CONSTANTS_BLOCK_ID: if (ParseConstants() || ResolveGlobalAndAliasInits()) @@ -1623,13 +1490,29 @@ bool BitcodeReader::ParseModule() { case bitc::FUNCTION_BLOCK_ID: // If this is the first function body we've seen, reverse the // FunctionsWithBodies list. - if (!HasReversedFunctionsWithBodies) { + if (!SeenFirstFunctionBody) { std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end()); - HasReversedFunctionsWithBodies = true; + if (GlobalCleanup()) + return true; + SeenFirstFunctionBody = true; } if (RememberAndSkipFunctionBody()) return true; + // For streaming bitcode, suspend parsing when we reach the function + // bodies. Subsequent materialization calls will resume it when + // necessary. For streaming, the function bodies must be at the end of + // the bitcode. If the bitcode file is old, the symbol table will be + // at the end instead and will not have been seen yet. In this case, + // just finish the parse now. + if (LazyStreamer && SeenValueSymbolTable) { + NextUnreadBit = Stream.GetCurrentBitNo(); + return false; + } + break; + case bitc::USELIST_BLOCK_ID: + if (ParseUseLists()) + return true; break; } continue; @@ -1784,8 +1667,10 @@ bool BitcodeReader::ParseModule() { // If this is a function with a body, remember the prototype we are // creating now, so that we can match up the body with them later. - if (!isProto) + if (!isProto) { FunctionsWithBodies.push_back(Func); + if (LazyStreamer) DeferredFunctionInfo[Func] = 0; + } break; } // ALIAS: [alias type, aliasee val#, linkage] @@ -1824,24 +1709,7 @@ bool BitcodeReader::ParseModule() { bool BitcodeReader::ParseBitcodeInto(Module *M) { TheModule = 0; - unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart(); - unsigned char *BufEnd = BufPtr+Buffer->getBufferSize(); - - if (Buffer->getBufferSize() & 3) { - if (!isRawBitcode(BufPtr, BufEnd) && !isBitcodeWrapper(BufPtr, BufEnd)) - return Error("Invalid bitcode signature"); - else - return Error("Bitcode stream should be a multiple of 4 bytes in length"); - } - - // If we have a wrapper header, parse it and ignore the non-bc file contents. - // The magic number is 0x0B17C0DE stored in little endian. - if (isBitcodeWrapper(BufPtr, BufEnd)) - if (SkipBitcodeWrapperHeader(BufPtr, BufEnd)) - return Error("Invalid bitcode wrapper header"); - - StreamFile.init(BufPtr, BufEnd); - Stream.init(StreamFile); + if (InitStream()) return true; // Sniff for the signature. if (Stream.Read(8) != 'B' || @@ -1883,8 +1751,9 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) { if (TheModule) return Error("Multiple MODULE_BLOCKs in same stream"); TheModule = M; - if (ParseModule()) + if (ParseModule(false)) return true; + if (LazyStreamer) return false; break; default: if (Stream.SkipBlock()) @@ -1952,20 +1821,7 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) { } bool BitcodeReader::ParseTriple(std::string &Triple) { - if (Buffer->getBufferSize() & 3) - return Error("Bitcode stream should be a multiple of 4 bytes in length"); - - unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart(); - unsigned char *BufEnd = BufPtr+Buffer->getBufferSize(); - - // If we have a wrapper header, parse it and ignore the non-bc file contents. - // The magic number is 0x0B17C0DE stored in little endian. - if (isBitcodeWrapper(BufPtr, BufEnd)) - if (SkipBitcodeWrapperHeader(BufPtr, BufEnd)) - return Error("Invalid bitcode wrapper header"); - - StreamFile.init(BufPtr, BufEnd); - Stream.init(StreamFile); + if (InitStream()) return true; // Sniff for the signature. if (Stream.Read(8) != 'B' || @@ -2517,10 +2373,6 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { InstructionList.push_back(I); break; } - case bitc::FUNC_CODE_INST_UNWIND: // UNWIND - I = new UnwindInst(Context); - InstructionList.push_back(I); - break; case bitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE I = new UnreachableInst(Context); InstructionList.push_back(I); @@ -2845,6 +2697,19 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { return false; } +/// FindFunctionInStream - Find the function body in the bitcode stream +bool BitcodeReader::FindFunctionInStream(Function *F, + DenseMap::iterator DeferredFunctionInfoIterator) { + while (DeferredFunctionInfoIterator->second == 0) { + if (Stream.AtEndOfStream()) + return Error("Could not find Function in stream"); + // ParseModule will parse the next body in the stream and set its + // position in the DeferredFunctionInfo map. + if (ParseModule(true)) return true; + } + return false; +} + //===----------------------------------------------------------------------===// // GVMaterializer implementation //===----------------------------------------------------------------------===// @@ -2865,6 +2730,10 @@ bool BitcodeReader::Materialize(GlobalValue *GV, std::string *ErrInfo) { DenseMap::iterator DFII = DeferredFunctionInfo.find(F); assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!"); + // If its position is recorded as 0, its body is somewhere in the stream + // but we haven't seen it yet. + if (DFII->second == 0) + if (LazyStreamer && FindFunctionInStream(F, DFII)) return true; // Move the bit stream to the saved position of the deferred function body. Stream.JumpToBit(DFII->second); @@ -2920,6 +2789,12 @@ bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) { Materialize(F, ErrInfo)) return true; + // At this point, if there are any function bodies, the current bit is + // pointing to the END_BLOCK record after them. Now make sure the rest + // of the bits in the module have been read. + if (NextUnreadBit) + ParseModule(true); + // Upgrade any intrinsic calls that slipped through (should not happen!) and // delete the old functions to clean up. We can't do this unless the entire // module is materialized because there could always be another function body @@ -2939,15 +2814,60 @@ bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) { } std::vector >().swap(UpgradedIntrinsics); - // Upgrade to new EH scheme. N.B. This will go away in 3.1. - UpgradeExceptionHandling(M); + return false; +} - // Check debug info intrinsics. - CheckDebugInfoIntrinsics(TheModule); +bool BitcodeReader::InitStream() { + if (LazyStreamer) return InitLazyStream(); + return InitStreamFromBuffer(); +} + +bool BitcodeReader::InitStreamFromBuffer() { + const unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart(); + const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize(); + + if (Buffer->getBufferSize() & 3) { + if (!isRawBitcode(BufPtr, BufEnd) && !isBitcodeWrapper(BufPtr, BufEnd)) + return Error("Invalid bitcode signature"); + else + return Error("Bitcode stream should be a multiple of 4 bytes in length"); + } + + // If we have a wrapper header, parse it and ignore the non-bc file contents. + // The magic number is 0x0B17C0DE stored in little endian. + if (isBitcodeWrapper(BufPtr, BufEnd)) + if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true)) + return Error("Invalid bitcode wrapper header"); + + StreamFile.reset(new BitstreamReader(BufPtr, BufEnd)); + Stream.init(*StreamFile); return false; } +bool BitcodeReader::InitLazyStream() { + // Check and strip off the bitcode wrapper; BitstreamReader expects never to + // see it. + StreamingMemoryObject *Bytes = new StreamingMemoryObject(LazyStreamer); + StreamFile.reset(new BitstreamReader(Bytes)); + Stream.init(*StreamFile); + + unsigned char buf[16]; + if (Bytes->readBytes(0, 16, buf, NULL) == -1) + return Error("Bitcode stream must be at least 16 bytes in length"); + + if (!isBitcode(buf, buf + 16)) + return Error("Invalid bitcode signature"); + + if (isBitcodeWrapper(buf, buf + 4)) { + const unsigned char *bitcodeStart = buf; + const unsigned char *bitcodeEnd = buf + 16; + SkipBitcodeWrapperHeader(bitcodeStart, bitcodeEnd, false); + Bytes->dropLeadingBytes(bitcodeStart - buf); + Bytes->setKnownObjectSize(bitcodeEnd - bitcodeStart); + } + return false; +} //===----------------------------------------------------------------------===// // External interface @@ -2970,6 +2890,27 @@ Module *llvm::getLazyBitcodeModule(MemoryBuffer *Buffer, } // Have the BitcodeReader dtor delete 'Buffer'. R->setBufferOwned(true); + + R->materializeForwardReferencedFunctions(); + + return M; +} + + +Module *llvm::getStreamedBitcodeModule(const std::string &name, + DataStreamer *streamer, + LLVMContext &Context, + std::string *ErrMsg) { + Module *M = new Module(name, Context); + BitcodeReader *R = new BitcodeReader(streamer, Context); + M->setMaterializer(R); + if (R->ParseBitcodeInto(M)) { + if (ErrMsg) + *ErrMsg = R->getErrorString(); + delete M; // Also deletes R. + return 0; + } + R->setBufferOwned(false); // no buffer to delete return M; } @@ -2990,6 +2931,9 @@ Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context, return 0; } + // TODO: Restore the use-lists to the in-memory state when the bitcode was + // written. We must defer until the Module has been fully materialized. + return M; } diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h index 6e6118cac0dc..e7c4e94f785f 100644 --- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h +++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h @@ -126,8 +126,11 @@ class BitcodeReader : public GVMaterializer { Module *TheModule; MemoryBuffer *Buffer; bool BufferOwned; - BitstreamReader StreamFile; + OwningPtr StreamFile; BitstreamCursor Stream; + DataStreamer *LazyStreamer; + uint64_t NextUnreadBit; + bool SeenValueSymbolTable; const char *ErrorString; @@ -135,6 +138,7 @@ class BitcodeReader : public GVMaterializer { BitcodeReaderValueList ValueList; BitcodeReaderMDValueList MDValueList; SmallVector InstructionList; + SmallVector, 64> UseListRecords; std::vector > GlobalInits; std::vector > AliasInits; @@ -160,9 +164,10 @@ class BitcodeReader : public GVMaterializer { // Map the bitcode's custom MDKind ID to the Module's MDKind ID. DenseMap MDKindMap; - // After the module header has been read, the FunctionsWithBodies list is - // reversed. This keeps track of whether we've done this yet. - bool HasReversedFunctionsWithBodies; + // Several operations happen after the module header has been read, but + // before function bodies are processed. This keeps track of whether + // we've done this yet. + bool SeenFirstFunctionBody; /// DeferredFunctionInfo - When function bodies are initially scanned, this /// map contains info about where to find deferred function body in the @@ -177,13 +182,22 @@ class BitcodeReader : public GVMaterializer { public: explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C) : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false), - ErrorString(0), ValueList(C), MDValueList(C) { - HasReversedFunctionsWithBodies = false; + LazyStreamer(0), NextUnreadBit(0), SeenValueSymbolTable(false), + ErrorString(0), ValueList(C), MDValueList(C), + SeenFirstFunctionBody(false) { + } + explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C) + : Context(C), TheModule(0), Buffer(0), BufferOwned(false), + LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false), + ErrorString(0), ValueList(C), MDValueList(C), + SeenFirstFunctionBody(false) { } ~BitcodeReader() { FreeState(); } - + + void materializeForwardReferencedFunctions(); + void FreeState(); /// setBufferOwned - If this is true, the reader will destroy the MemoryBuffer @@ -211,7 +225,6 @@ class BitcodeReader : public GVMaterializer { bool ParseTriple(std::string &Triple); private: Type *getTypeByID(unsigned ID); - Type *getTypeByIDOrNull(unsigned ID); Value *getFnValueByID(unsigned ID, Type *Ty) { if (Ty && Ty->isMetadataTy()) return MDValueList.getValueFwdRef(ID); @@ -256,21 +269,26 @@ class BitcodeReader : public GVMaterializer { } - bool ParseModule(); + bool ParseModule(bool Resume); bool ParseAttributeBlock(); bool ParseTypeTable(); - bool ParseOldTypeTable(); // FIXME: Remove in LLVM 3.1 bool ParseTypeTableBody(); - bool ParseOldTypeSymbolTable(); // FIXME: Remove in LLVM 3.1 bool ParseValueSymbolTable(); bool ParseConstants(); bool RememberAndSkipFunctionBody(); bool ParseFunctionBody(Function *F); + bool GlobalCleanup(); bool ResolveGlobalAndAliasInits(); bool ParseMetadata(); bool ParseMetadataAttachment(); bool ParseModuleTriple(std::string &Triple); + bool ParseUseLists(); + bool InitStream(); + bool InitStreamFromBuffer(); + bool InitLazyStream(); + bool FindFunctionInStream(Function *F, + DenseMap::iterator DeferredFunctionInfoIterator); }; } // End llvm namespace diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 5b3d96953a0e..b25d2e96d594 100644 --- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -23,6 +23,7 @@ #include "llvm/Operator.h" #include "llvm/ValueSymbolTable.h" #include "llvm/ADT/Triple.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -31,6 +32,12 @@ #include using namespace llvm; +static cl::opt +EnablePreserveUseListOrdering("enable-bc-uselist-preserve", + cl::desc("Turn on experimental support for " + "use-list order preservation."), + cl::init(false), cl::Hidden); + /// These are manifest constants used by the bitcode writer. They do not need to /// be kept in sync with the reader, but need to be consistent within this file. enum { @@ -119,7 +126,6 @@ static unsigned GetEncodedRMWOperation(AtomicRMWInst::BinOp Op) { static unsigned GetEncodedOrdering(AtomicOrdering Ordering) { switch (Ordering) { - default: llvm_unreachable("Unknown atomic ordering"); case NotAtomic: return bitc::ORDERING_NOTATOMIC; case Unordered: return bitc::ORDERING_UNORDERED; case Monotonic: return bitc::ORDERING_MONOTONIC; @@ -128,14 +134,15 @@ static unsigned GetEncodedOrdering(AtomicOrdering Ordering) { case AcquireRelease: return bitc::ORDERING_ACQREL; case SequentiallyConsistent: return bitc::ORDERING_SEQCST; } + llvm_unreachable("Invalid ordering"); } static unsigned GetEncodedSynchScope(SynchronizationScope SynchScope) { switch (SynchScope) { - default: llvm_unreachable("Unknown synchronization scope"); case SingleThread: return bitc::SYNCHSCOPE_SINGLETHREAD; case CrossThread: return bitc::SYNCHSCOPE_CROSSTHREAD; } + llvm_unreachable("Invalid synch scope"); } static void WriteStringRecord(unsigned Code, StringRef Str, @@ -172,10 +179,11 @@ static void WriteAttributeTable(const ValueEnumerator &VE, // Store the alignment in the bitcode as a 16-bit raw value instead of a // 5-bit log2 encoded value. Shift the bits above the alignment up by // 11 bits. - uint64_t FauxAttr = PAWI.Attrs & 0xffff; + uint64_t FauxAttr = PAWI.Attrs.Raw() & 0xffff; if (PAWI.Attrs & Attribute::Alignment) - FauxAttr |= (1ull<<16)<<(((PAWI.Attrs & Attribute::Alignment)-1) >> 16); - FauxAttr |= (PAWI.Attrs & (0x3FFull << 21)) << 11; + FauxAttr |= (1ull<<16)<< + (((PAWI.Attrs & Attribute::Alignment).Raw()-1) >> 16); + FauxAttr |= (PAWI.Attrs.Raw() & (0x3FFull << 21)) << 11; Record.push_back(FauxAttr); } @@ -194,11 +202,12 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */); SmallVector TypeVals; + uint64_t NumBits = Log2_32_Ceil(VE.getTypes().size()+1); + // Abbrev for TYPE_CODE_POINTER. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, - Log2_32_Ceil(VE.getTypes().size()+1))); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); Abbv->Add(BitCodeAbbrevOp(0)); // Addrspace = 0 unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv); @@ -206,10 +215,9 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isvararg - Abbv->Add(BitCodeAbbrevOp(0)); // FIXME: DEAD value, remove in LLVM 3.0 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, - Log2_32_Ceil(VE.getTypes().size()+1))); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv); // Abbrev for TYPE_CODE_STRUCT_ANON. @@ -217,8 +225,8 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, - Log2_32_Ceil(VE.getTypes().size()+1))); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv); // Abbrev for TYPE_CODE_STRUCT_NAME. @@ -233,16 +241,16 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, - Log2_32_Ceil(VE.getTypes().size()+1))); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv); // Abbrev for TYPE_CODE_ARRAY. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // size - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, - Log2_32_Ceil(VE.getTypes().size()+1))); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv); // Emit an entry count so the reader can reserve space. @@ -259,6 +267,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { switch (T->getTypeID()) { default: llvm_unreachable("Unknown type!"); case Type::VoidTyID: Code = bitc::TYPE_CODE_VOID; break; + case Type::HalfTyID: Code = bitc::TYPE_CODE_HALF; break; case Type::FloatTyID: Code = bitc::TYPE_CODE_FLOAT; break; case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break; case Type::X86_FP80TyID: Code = bitc::TYPE_CODE_X86_FP80; break; @@ -284,10 +293,9 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { } case Type::FunctionTyID: { FunctionType *FT = cast(T); - // FUNCTION: [isvararg, attrid, retty, paramty x N] + // FUNCTION: [isvararg, retty, paramty x N] Code = bitc::TYPE_CODE_FUNCTION; TypeVals.push_back(FT->isVarArg()); - TypeVals.push_back(0); // FIXME: DEAD: remove in llvm 3.0 TypeVals.push_back(VE.getTypeID(FT->getReturnType())); for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) TypeVals.push_back(VE.getTypeID(FT->getParamType(i))); @@ -350,7 +358,6 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { static unsigned getEncodedLinkage(const GlobalValue *GV) { switch (GV->getLinkage()) { - default: llvm_unreachable("Invalid linkage!"); case GlobalValue::ExternalLinkage: return 0; case GlobalValue::WeakAnyLinkage: return 1; case GlobalValue::AppendingLinkage: return 2; @@ -368,15 +375,16 @@ static unsigned getEncodedLinkage(const GlobalValue *GV) { case GlobalValue::LinkerPrivateWeakLinkage: return 14; case GlobalValue::LinkerPrivateWeakDefAutoLinkage: return 15; } + llvm_unreachable("Invalid linkage"); } static unsigned getEncodedVisibility(const GlobalValue *GV) { switch (GV->getVisibility()) { - default: llvm_unreachable("Invalid visibility!"); case GlobalValue::DefaultVisibility: return 0; case GlobalValue::HiddenVisibility: return 1; case GlobalValue::ProtectedVisibility: return 2; } + llvm_unreachable("Invalid visibility"); } // Emit top-level description of module, including target triple, inline asm, @@ -499,8 +507,8 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, // Emit the function proto information. for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) { - // FUNCTION: [type, callingconv, isproto, paramattr, - // linkage, alignment, section, visibility, gc, unnamed_addr] + // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment, + // section, visibility, gc, unnamed_addr] Vals.push_back(VE.getTypeID(F->getType())); Vals.push_back(F->getCallingConv()); Vals.push_back(F->isDeclaration()); @@ -520,6 +528,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, // Emit the alias information. for (Module::const_alias_iterator AI = M->alias_begin(), E = M->alias_end(); AI != E; ++AI) { + // ALIAS: [alias type, aliasee val#, linkage, visibility] Vals.push_back(VE.getTypeID(AI->getType())); Vals.push_back(VE.getValueID(AI->getAliasee())); Vals.push_back(getEncodedLinkage(AI)); @@ -819,7 +828,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, } else if (const ConstantFP *CFP = dyn_cast(C)) { Code = bitc::CST_CODE_FLOAT; Type *Ty = CFP->getType(); - if (Ty->isFloatTy() || Ty->isDoubleTy()) { + if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) { Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); } else if (Ty->isX86_FP80Ty()) { // api needed to prevent premature destruction @@ -836,34 +845,56 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, } else { assert (0 && "Unknown FP type!"); } - } else if (isa(C) && cast(C)->isString()) { - const ConstantArray *CA = cast(C); + } else if (isa(C) && + cast(C)->isString()) { + const ConstantDataSequential *Str = cast(C); // Emit constant strings specially. - unsigned NumOps = CA->getNumOperands(); + unsigned NumElts = Str->getNumElements(); // If this is a null-terminated string, use the denser CSTRING encoding. - if (CA->getOperand(NumOps-1)->isNullValue()) { + if (Str->isCString()) { Code = bitc::CST_CODE_CSTRING; - --NumOps; // Don't encode the null, which isn't allowed by char6. + --NumElts; // Don't encode the null, which isn't allowed by char6. } else { Code = bitc::CST_CODE_STRING; AbbrevToUse = String8Abbrev; } bool isCStr7 = Code == bitc::CST_CODE_CSTRING; bool isCStrChar6 = Code == bitc::CST_CODE_CSTRING; - for (unsigned i = 0; i != NumOps; ++i) { - unsigned char V = cast(CA->getOperand(i))->getZExtValue(); + for (unsigned i = 0; i != NumElts; ++i) { + unsigned char V = Str->getElementAsInteger(i); Record.push_back(V); isCStr7 &= (V & 128) == 0; if (isCStrChar6) isCStrChar6 = BitCodeAbbrevOp::isChar6(V); } - + if (isCStrChar6) AbbrevToUse = CString6Abbrev; else if (isCStr7) AbbrevToUse = CString7Abbrev; - } else if (isa(C) || isa(V) || - isa(V)) { + } else if (const ConstantDataSequential *CDS = + dyn_cast(C)) { + Code = bitc::CST_CODE_DATA; + Type *EltTy = CDS->getType()->getElementType(); + if (isa(EltTy)) { + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) + Record.push_back(CDS->getElementAsInteger(i)); + } else if (EltTy->isFloatTy()) { + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + union { float F; uint32_t I; }; + F = CDS->getElementAsFloat(i); + Record.push_back(I); + } + } else { + assert(EltTy->isDoubleTy() && "Unknown ConstantData element type"); + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + union { double F; uint64_t I; }; + F = CDS->getElementAsDouble(i); + Record.push_back(I); + } + } + } else if (isa(C) || isa(C) || + isa(C)) { Code = bitc::CST_CODE_AGGREGATE; for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) Record.push_back(VE.getValueID(C->getOperand(i))); @@ -1105,10 +1136,18 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, } break; case Instruction::Switch: - Code = bitc::FUNC_CODE_INST_SWITCH; - Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); - for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) - Vals.push_back(VE.getValueID(I.getOperand(i))); + { + Code = bitc::FUNC_CODE_INST_SWITCH; + SwitchInst &SI = cast(I); + Vals.push_back(VE.getTypeID(SI.getCondition()->getType())); + Vals.push_back(VE.getValueID(SI.getCondition())); + Vals.push_back(VE.getValueID(SI.getDefaultDest())); + for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); + i != e; ++i) { + Vals.push_back(VE.getValueID(i.getCaseValue())); + Vals.push_back(VE.getValueID(i.getCaseSuccessor())); + } + } break; case Instruction::IndirectBr: Code = bitc::FUNC_CODE_INST_INDIRECTBR; @@ -1146,9 +1185,6 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Code = bitc::FUNC_CODE_INST_RESUME; PushValueAndType(I.getOperand(0), InstID, Vals, VE); break; - case Instruction::Unwind: - Code = bitc::FUNC_CODE_INST_UNWIND; - break; case Instruction::Unreachable: Code = bitc::FUNC_CODE_INST_UNREACHABLE; AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV; @@ -1573,6 +1609,102 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { Stream.ExitBlock(); } +// Sort the Users based on the order in which the reader parses the bitcode +// file. +static bool bitcodereader_order(const User *lhs, const User *rhs) { + // TODO: Implement. + return true; +} + +static void WriteUseList(const Value *V, const ValueEnumerator &VE, + BitstreamWriter &Stream) { + + // One or zero uses can't get out of order. + if (V->use_empty() || V->hasNUses(1)) + return; + + // Make a copy of the in-memory use-list for sorting. + unsigned UseListSize = std::distance(V->use_begin(), V->use_end()); + SmallVector UseList; + UseList.reserve(UseListSize); + for (Value::const_use_iterator I = V->use_begin(), E = V->use_end(); + I != E; ++I) { + const User *U = *I; + UseList.push_back(U); + } + + // Sort the copy based on the order read by the BitcodeReader. + std::sort(UseList.begin(), UseList.end(), bitcodereader_order); + + // TODO: Generate a diff between the BitcodeWriter in-memory use-list and the + // sorted list (i.e., the expected BitcodeReader in-memory use-list). + + // TODO: Emit the USELIST_CODE_ENTRYs. +} + +static void WriteFunctionUseList(const Function *F, ValueEnumerator &VE, + BitstreamWriter &Stream) { + VE.incorporateFunction(*F); + + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) + WriteUseList(AI, VE, Stream); + for (Function::const_iterator BB = F->begin(), FE = F->end(); BB != FE; + ++BB) { + WriteUseList(BB, VE, Stream); + for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; + ++II) { + WriteUseList(II, VE, Stream); + for (User::const_op_iterator OI = II->op_begin(), E = II->op_end(); + OI != E; ++OI) { + if ((isa(*OI) && !isa(*OI)) || + isa(*OI)) + WriteUseList(*OI, VE, Stream); + } + } + } + VE.purgeFunction(); +} + +// Emit use-lists. +static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE, + BitstreamWriter &Stream) { + Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3); + + // XXX: this modifies the module, but in a way that should never change the + // behavior of any pass or codegen in LLVM. The problem is that GVs may + // contain entries in the use_list that do not exist in the Module and are + // not stored in the .bc file. + for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) + I->removeDeadConstantUsers(); + + // Write the global variables. + for (Module::const_global_iterator GI = M->global_begin(), + GE = M->global_end(); GI != GE; ++GI) { + WriteUseList(GI, VE, Stream); + + // Write the global variable initializers. + if (GI->hasInitializer()) + WriteUseList(GI->getInitializer(), VE, Stream); + } + + // Write the functions. + for (Module::const_iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) { + WriteUseList(FI, VE, Stream); + if (!FI->isDeclaration()) + WriteFunctionUseList(FI, VE, Stream); + } + + // Write the aliases. + for (Module::const_alias_iterator AI = M->alias_begin(), AE = M->alias_end(); + AI != AE; ++AI) { + WriteUseList(AI, VE, Stream); + WriteUseList(AI->getAliasee(), VE, Stream); + } + + Stream.ExitBlock(); +} /// WriteModule - Emit the specified module to the bitstream. static void WriteModule(const Module *M, BitstreamWriter &Stream) { @@ -1607,17 +1739,21 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) { // Emit metadata. WriteModuleMetadata(M, VE, Stream); - // Emit function bodies. - for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) - if (!F->isDeclaration()) - WriteFunction(*F, VE, Stream); - // Emit metadata. WriteModuleMetadataStore(M, Stream); // Emit names for globals/functions etc. WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream); + // Emit use-lists. + if (EnablePreserveUseListOrdering) + WriteModuleUseLists(M, VE, Stream); + + // Emit function bodies. + for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) + if (!F->isDeclaration()) + WriteFunction(*F, VE, Stream); + Stream.ExitBlock(); } @@ -1639,7 +1775,17 @@ enum { DarwinBCHeaderSize = 5*4 }; -static void EmitDarwinBCHeader(BitstreamWriter &Stream, const Triple &TT) { +static void WriteInt32ToBuffer(uint32_t Value, SmallVectorImpl &Buffer, + uint32_t &Position) { + Buffer[Position + 0] = (unsigned char) (Value >> 0); + Buffer[Position + 1] = (unsigned char) (Value >> 8); + Buffer[Position + 2] = (unsigned char) (Value >> 16); + Buffer[Position + 3] = (unsigned char) (Value >> 24); + Position += 4; +} + +static void EmitDarwinBCHeaderAndTrailer(SmallVectorImpl &Buffer, + const Triple &TT) { unsigned CPUType = ~0U; // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*, arm-*, thumb-*, @@ -1666,63 +1812,55 @@ static void EmitDarwinBCHeader(BitstreamWriter &Stream, const Triple &TT) { CPUType = DARWIN_CPU_TYPE_ARM; // Traditional Bitcode starts after header. + assert(Buffer.size() >= DarwinBCHeaderSize && + "Expected header size to be reserved"); unsigned BCOffset = DarwinBCHeaderSize; + unsigned BCSize = Buffer.size()-DarwinBCHeaderSize; - Stream.Emit(0x0B17C0DE, 32); - Stream.Emit(0 , 32); // Version. - Stream.Emit(BCOffset , 32); - Stream.Emit(0 , 32); // Filled in later. - Stream.Emit(CPUType , 32); -} - -/// EmitDarwinBCTrailer - Emit the darwin epilog after the bitcode file and -/// finalize the header. -static void EmitDarwinBCTrailer(BitstreamWriter &Stream, unsigned BufferSize) { - // Update the size field in the header. - Stream.BackpatchWord(DarwinBCSizeFieldOffset, BufferSize-DarwinBCHeaderSize); + // Write the magic and version. + unsigned Position = 0; + WriteInt32ToBuffer(0x0B17C0DE , Buffer, Position); + WriteInt32ToBuffer(0 , Buffer, Position); // Version. + WriteInt32ToBuffer(BCOffset , Buffer, Position); + WriteInt32ToBuffer(BCSize , Buffer, Position); + WriteInt32ToBuffer(CPUType , Buffer, Position); // If the file is not a multiple of 16 bytes, insert dummy padding. - while (BufferSize & 15) { - Stream.Emit(0, 8); - ++BufferSize; - } + while (Buffer.size() & 15) + Buffer.push_back(0); } - /// WriteBitcodeToFile - Write the specified module to the specified output /// stream. void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) { - std::vector Buffer; - BitstreamWriter Stream(Buffer); - + SmallVector Buffer; Buffer.reserve(256*1024); - WriteBitcodeToStream( M, Stream ); + // If this is darwin or another generic macho target, reserve space for the + // header. + Triple TT(M->getTargetTriple()); + if (TT.isOSDarwin()) + Buffer.insert(Buffer.begin(), DarwinBCHeaderSize, 0); + + // Emit the module into the buffer. + { + BitstreamWriter Stream(Buffer); + + // Emit the file header. + Stream.Emit((unsigned)'B', 8); + Stream.Emit((unsigned)'C', 8); + Stream.Emit(0x0, 4); + Stream.Emit(0xC, 4); + Stream.Emit(0xE, 4); + Stream.Emit(0xD, 4); + + // Emit the module. + WriteModule(M, Stream); + } + + if (TT.isOSDarwin()) + EmitDarwinBCHeaderAndTrailer(Buffer, TT); // Write the generated bitstream to "Out". Out.write((char*)&Buffer.front(), Buffer.size()); } - -/// WriteBitcodeToStream - Write the specified module to the specified output -/// stream. -void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) { - // If this is darwin or another generic macho target, emit a file header and - // trailer if needed. - Triple TT(M->getTargetTriple()); - if (TT.isOSDarwin()) - EmitDarwinBCHeader(Stream, TT); - - // Emit the file header. - Stream.Emit((unsigned)'B', 8); - Stream.Emit((unsigned)'C', 8); - Stream.Emit(0x0, 4); - Stream.Emit(0xC, 4); - Stream.Emit(0xE, 4); - Stream.Emit(0xD, 4); - - // Emit the module. - WriteModule(M, Stream); - - if (TT.isOSDarwin()) - EmitDarwinBCTrailer(Stream, Stream.getBuffer().size()); -} diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp index 9ae9905b9f1d..1ed9004eb5a1 100644 --- a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -19,6 +19,8 @@ #include "llvm/Module.h" #include "llvm/ValueSymbolTable.h" #include "llvm/Instructions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -107,7 +109,6 @@ ValueEnumerator::ValueEnumerator(const Module *M) { OptimizeConstants(FirstConstant, Values.size()); } - unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const { InstructionMapType::const_iterator I = InstructionMap.find(Inst); assert(I != InstructionMap.end() && "Instruction is not mapped!"); @@ -130,6 +131,43 @@ unsigned ValueEnumerator::getValueID(const Value *V) const { return I->second-1; } +void ValueEnumerator::dump() const { + print(dbgs(), ValueMap, "Default"); + dbgs() << '\n'; + print(dbgs(), MDValueMap, "MetaData"); + dbgs() << '\n'; +} + +void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map, + const char *Name) const { + + OS << "Map Name: " << Name << "\n"; + OS << "Size: " << Map.size() << "\n"; + for (ValueMapType::const_iterator I = Map.begin(), + E = Map.end(); I != E; ++I) { + + const Value *V = I->first; + if (V->hasName()) + OS << "Value: " << V->getName(); + else + OS << "Value: [null]\n"; + V->dump(); + + OS << " Uses(" << std::distance(V->use_begin(),V->use_end()) << "):"; + for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + if (UI != V->use_begin()) + OS << ","; + if((*UI)->hasName()) + OS << " " << (*UI)->getName(); + else + OS << " [null]"; + + } + OS << "\n\n"; + } +} + // Optimize constant ordering. namespace { struct CstSortPredicate { @@ -283,10 +321,6 @@ void ValueEnumerator::EnumerateValue(const Value *V) { if (const Constant *C = dyn_cast(V)) { if (isa(C)) { // Initializers for globals are handled explicitly elsewhere. - } else if (isa(C) && cast(C)->isString()) { - // Do not enumerate the initializers for an array of simple characters. - // The initializers just pollute the value table, and we emit the strings - // specially. } else if (C->getNumOperands()) { // If a constant has operands, enumerate them. This makes sure that if a // constant has uses (for example an array of const ints), that they are diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h index b6fc920e412b..a6ca53606248 100644 --- a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h +++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h @@ -32,6 +32,7 @@ class NamedMDNode; class AttrListPtr; class ValueSymbolTable; class MDSymbolTable; +class raw_ostream; class ValueEnumerator { public: @@ -83,6 +84,9 @@ class ValueEnumerator { public: ValueEnumerator(const Module *M); + void dump() const; + void print(raw_ostream &OS, const ValueMapType &Map, const char *Name) const; + unsigned getValueID(const Value *V) const; unsigned getTypeID(Type *T) const { diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 25842a7876a2..822a564441ac 100644 --- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -148,7 +148,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { assert(State == NULL); State = new AggressiveAntiDepState(TRI->getNumRegs(), BB); - bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn()); + bool IsReturnBlock = (!BB->empty() && BB->back().isReturn()); std::vector &KillIndices = State->GetKillIndices(); std::vector &DefIndices = State->GetDefIndices(); @@ -157,7 +157,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // In a return block, examine the function live-out regs. for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), E = MRI.liveout_end(); I != E; ++I) { - for (const unsigned *Alias = TRI->getOverlaps(*I); + for (const uint16_t *Alias = TRI->getOverlaps(*I); unsigned Reg = *Alias; ++Alias) { State->UnionGroups(Reg, 0); KillIndices[Reg] = BB->size(); @@ -173,7 +173,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { - for (const unsigned *Alias = TRI->getOverlaps(*I); + for (const uint16_t *Alias = TRI->getOverlaps(*I); unsigned Reg = *Alias; ++Alias) { State->UnionGroups(Reg, 0); KillIndices[Reg] = BB->size(); @@ -186,10 +186,10 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // callee-saved register that is not saved in the prolog. const MachineFrameInfo *MFI = MF.getFrameInfo(); BitVector Pristine = MFI->getPristineRegs(BB); - for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { + for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { unsigned Reg = *I; if (!IsReturnBlock && !Pristine.test(Reg)) continue; - for (const unsigned *Alias = TRI->getOverlaps(Reg); + for (const uint16_t *Alias = TRI->getOverlaps(Reg); unsigned AliasReg = *Alias; ++Alias) { State->UnionGroups(AliasReg, 0); KillIndices[AliasReg] = BB->size(); @@ -265,7 +265,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, IsImplicitDefUse(MI, MO)) { const unsigned Reg = MO.getReg(); PassthruRegs.insert(Reg); - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) { PassthruRegs.insert(*Subreg); } @@ -333,7 +333,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag); } // Repeat for subregisters. - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) { unsigned SubregReg = *Subreg; if (!State->IsLive(SubregReg)) { @@ -384,7 +384,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). - if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() || + if (MI->isCall() || MI->hasExtraDefRegAllocReq() || TII->isPredicated(MI)) { DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); State->UnionGroups(Reg, 0); @@ -392,7 +392,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // Any aliased that are live at this point are completely or // partially defined here, so group those aliases with Reg. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; if (State->IsLive(AliasReg)) { State->UnionGroups(Reg, AliasReg); @@ -423,7 +423,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, continue; // Update def for Reg and aliases. - for (const unsigned *Alias = TRI->getOverlaps(Reg); + for (const uint16_t *Alias = TRI->getOverlaps(Reg); unsigned AliasReg = *Alias; ++Alias) DefIndices[AliasReg] = Count; } @@ -451,8 +451,8 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // instruction which may not be executed. The second R6 def may or may not // re-define R6 so it's not safe to change it since the last R6 use cannot be // changed. - bool Special = MI->getDesc().isCall() || - MI->getDesc().hasExtraSrcRegAllocReq() || + bool Special = MI->isCall() || + MI->hasExtraSrcRegAllocReq() || TII->isPredicated(MI); // Scan the register uses for this instruction and update @@ -678,7 +678,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( goto next_super_reg; } else { bool found = false; - for (const unsigned *Alias = TRI->getAliasSet(NewReg); + for (const uint16_t *Alias = TRI->getAliasSet(NewReg); *Alias; ++Alias) { unsigned AliasReg = *Alias; if (State->IsLive(AliasReg) || @@ -780,6 +780,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( I != E; --Count) { MachineInstr *MI = --I; + if (MI->isDebugValue()) + continue; + DEBUG(dbgs() << "Anti: "); DEBUG(MI->dump()); diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp index 1005f102bea6..87f64311a655 100644 --- a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp +++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp @@ -41,7 +41,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg, if (HintPair.first) { const TargetRegisterInfo &TRI = VRM.getTargetRegInfo(); // The remaining allocation order may depend on the hint. - ArrayRef Order = + ArrayRef Order = TRI.getRawAllocationOrder(RC, HintPair.first, Hint, VRM.getMachineFunction()); if (Order.empty()) diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm/lib/CodeGen/AllocationOrder.h index d1e48a1f2e96..0ce7e0c3b5f6 100644 --- a/contrib/llvm/lib/CodeGen/AllocationOrder.h +++ b/contrib/llvm/lib/CodeGen/AllocationOrder.h @@ -34,8 +34,7 @@ class AllocationOrder { /// AllocationOrder - Create a new AllocationOrder for VirtReg. /// @param VirtReg Virtual register to allocate for. /// @param VRM Virtual register map for function. - /// @param ReservedRegs Set of reserved registers as returned by - /// TargetRegisterInfo::getReservedRegs(). + /// @param RegClassInfo Information about reserved and allocatable registers. AllocationOrder(unsigned VirtReg, const VirtRegMap &VRM, const RegisterClassInfo &RegClassInfo); diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp index fafc01044d4f..00874d411378 100644 --- a/contrib/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -1,4 +1,4 @@ -//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities --*- C++ ------*-===// +//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===// // // The LLVM Compiler Infrastructure // @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Analysis.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" @@ -149,33 +150,37 @@ llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos, /// consideration of global floating-point math flags. /// ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) { - ISD::CondCode FPC, FOC; switch (Pred) { - case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break; - case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break; - case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break; - case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break; - case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break; - case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break; - case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break; - case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break; - case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break; - case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break; - case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break; - case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break; - case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break; - case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break; - case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break; - case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break; - default: - llvm_unreachable("Invalid FCmp predicate opcode!"); - FOC = FPC = ISD::SETFALSE; - break; + case FCmpInst::FCMP_FALSE: return ISD::SETFALSE; + case FCmpInst::FCMP_OEQ: return ISD::SETOEQ; + case FCmpInst::FCMP_OGT: return ISD::SETOGT; + case FCmpInst::FCMP_OGE: return ISD::SETOGE; + case FCmpInst::FCMP_OLT: return ISD::SETOLT; + case FCmpInst::FCMP_OLE: return ISD::SETOLE; + case FCmpInst::FCMP_ONE: return ISD::SETONE; + case FCmpInst::FCMP_ORD: return ISD::SETO; + case FCmpInst::FCMP_UNO: return ISD::SETUO; + case FCmpInst::FCMP_UEQ: return ISD::SETUEQ; + case FCmpInst::FCMP_UGT: return ISD::SETUGT; + case FCmpInst::FCMP_UGE: return ISD::SETUGE; + case FCmpInst::FCMP_ULT: return ISD::SETULT; + case FCmpInst::FCMP_ULE: return ISD::SETULE; + case FCmpInst::FCMP_UNE: return ISD::SETUNE; + case FCmpInst::FCMP_TRUE: return ISD::SETTRUE; + default: llvm_unreachable("Invalid FCmp predicate opcode!"); + } +} + +ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) { + switch (CC) { + case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ; + case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE; + case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT; + case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE; + case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT; + case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE; + default: return CC; } - if (NoNaNsFPMath) - return FOC; - else - return FPC; } /// getICmpCondCode - Return the ISD condition code corresponding to @@ -195,7 +200,6 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) { case ICmpInst::ICMP_UGT: return ISD::SETUGT; default: llvm_unreachable("Invalid ICmp predicate opcode!"); - return ISD::SETNE; } } @@ -221,12 +225,13 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, // longjmp on x86), it can end up causing miscompilation that has not // been fully understood. if (!Ret && - (!GuaranteedTailCallOpt || !isa(Term))) return false; + (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt || + !isa(Term))) return false; // If I will have a chain, make sure no other instruction that will have a // chain interposes between I and the return. if (I->mayHaveSideEffects() || I->mayReadFromMemory() || - !I->isSafeToSpeculativelyExecute()) + !isSafeToSpeculativelyExecute(I)) for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ; --BBI) { if (&*BBI == I) @@ -235,7 +240,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, if (isa(BBI)) continue; if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || - !BBI->isSafeToSpeculativelyExecute()) + !isSafeToSpeculativelyExecute(BBI)) return false; } @@ -250,7 +255,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, // Conservatively require the attributes of the call to match those of // the return. Ignore noalias because it doesn't affect the call sequence. const Function *F = ExitBB->getParent(); - unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); + Attributes CallerRetAttr = F->getAttributes().getRetAttributes(); if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) return false; @@ -285,12 +290,12 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, } bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, - const TargetLowering &TLI) { + SDValue &Chain, const TargetLowering &TLI) { const Function *F = DAG.getMachineFunction().getFunction(); // Conservatively require the attributes of the call to match those of // the return. Ignore noalias because it doesn't affect the call sequence. - unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); + Attributes CallerRetAttr = F->getAttributes().getRetAttributes(); if (CallerRetAttr & ~Attribute::NoAlias) return false; @@ -299,5 +304,5 @@ bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, return false; // Check if the only use is a function return node. - return TLI.isUsedByReturnOnly(Node); + return TLI.isUsedByReturnOnly(Node, Chain); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index 3f2387325360..b60fda86a6ba 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -29,6 +29,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/FormattedStream.h" #include "llvm/ADT/SmallString.h" @@ -36,6 +37,12 @@ #include "llvm/ADT/Twine.h" using namespace llvm; +cl::opt +EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden, + cl::desc("Generate ARM EHABI tables with unwinding descriptors"), + cl::init(false)); + + ARMException::ARMException(AsmPrinter *A) : DwarfException(A), shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false) @@ -72,13 +79,15 @@ void ARMException::EndFunction() { Asm->OutStreamer.EmitPersonality(PerSym); } - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); + if (EnableARMEHABIDescriptors) { + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); - Asm->OutStreamer.EmitHandlerData(); + Asm->OutStreamer.EmitHandlerData(); - // Emit actual exception table - EmitExceptionTable(); + // Emit actual exception table + EmitExceptionTable(); + } } Asm->OutStreamer.EmitFnEnd(); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 1999f3608788..b0b2ff4882af 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -100,6 +100,7 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) OutStreamer(Streamer), LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) { DD = 0; DE = 0; MMI = 0; LI = 0; + CurrentFnSym = CurrentFnSymForSize = 0; GCMetadataPrinters = 0; VerboseAsm = Streamer.isVerboseAsm(); } @@ -613,6 +614,10 @@ bool AsmPrinter::needsSEHMoves() { MF->getFunction()->needsUnwindTableEntry(); } +bool AsmPrinter::needsRelocationsForDwarfStringPool() const { + return MAI->doesDwarfUseRelocationsForStringPool(); +} + void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { MCSymbol *Label = MI.getOperand(0).getMCSymbol(); @@ -732,6 +737,18 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer.EmitRawText(StringRef("\tnop\n")); } + const Function *F = MF->getFunction(); + for (Function::const_iterator i = F->begin(), e = F->end(); i != e; ++i) { + const BasicBlock *BB = i; + if (!BB->hasAddressTaken()) + continue; + MCSymbol *Sym = GetBlockAddressSymbol(BB); + if (Sym->isDefined()) + continue; + OutStreamer.AddComment("Address of block that was removed by CodeGen"); + OutStreamer.EmitLabel(Sym); + } + // Emit target-specific gunk after the function body. EmitFunctionBodyEnd(); @@ -745,7 +762,8 @@ void AsmPrinter::EmitFunctionBody() { const MCExpr *SizeExp = MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext), - MCSymbolRefExpr::Create(CurrentFnSym, OutContext), + MCSymbolRefExpr::Create(CurrentFnSymForSize, + OutContext), OutContext); OutStreamer.EmitELFSize(CurrentFnSym, SizeExp); } @@ -780,7 +798,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { const TargetRegisterInfo *TRI = TM.getRegisterInfo(); int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); - for (const unsigned *SR = TRI->getSuperRegisters(MLoc.getReg()); + for (const uint16_t *SR = TRI->getSuperRegisters(MLoc.getReg()); *SR && Reg < 0; ++SR) { Reg = TRI->getDwarfRegNum(*SR, false); // FIXME: Get the bit range this register uses of the superregister @@ -841,6 +859,12 @@ bool AsmPrinter::doFinalization(Module &M) { EmitVisibility(Name, V, false); } + // Emit module flags. + SmallVector ModuleFlags; + M.getModuleFlagsMetadata(ModuleFlags); + if (!ModuleFlags.empty()) + getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, Mang, TM); + // Finalize debug and EH information. if (DE) { { @@ -929,6 +953,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { this->MF = &MF; // Get the function symbol. CurrentFnSym = Mang->getSymbol(MF.getFunction()); + CurrentFnSymForSize = CurrentFnSym; if (isVerbose()) LI = &getAnalysis(); @@ -1120,7 +1145,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, const MCExpr *Value = 0; switch (MJTI->getEntryKind()) { case MachineJumpTableInfo::EK_Inline: - llvm_unreachable("Cannot emit EK_Inline jump table entry"); break; + llvm_unreachable("Cannot emit EK_Inline jump table entry"); case MachineJumpTableInfo::EK_Custom32: Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID, OutContext); @@ -1139,6 +1164,15 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, return; } + case MachineJumpTableInfo::EK_GPRel64BlockAddress: { + // EK_GPRel64BlockAddress - Each entry is an address of block, encoded + // with a relocation as gp-relative, e.g.: + // .gpdword LBB123 + MCSymbol *MBBSym = MBB->getSymbol(); + OutStreamer.EmitGPRel64Value(MCSymbolRefExpr::Create(MBBSym, OutContext)); + return; + } + case MachineJumpTableInfo::EK_LabelDifference32: { // EK_LabelDifference32 - Each entry is the address of the block minus // the address of the jump table. This is used for PIC jump tables where @@ -1191,12 +1225,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { assert(GV->hasInitializer() && "Not a special LLVM global!"); - const TargetData *TD = TM.getTargetData(); - unsigned Align = Log2_32(TD->getPointerPrefAlignment()); if (GV->getName() == "llvm.global_ctors") { - OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection()); - EmitAlignment(Align); - EmitXXStructorList(GV->getInitializer()); + EmitXXStructorList(GV->getInitializer(), /* isCtor */ true); if (TM.getRelocationModel() == Reloc::Static && MAI->hasStaticCtorDtorReferenceInStaticMode()) { @@ -1208,9 +1238,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { } if (GV->getName() == "llvm.global_dtors") { - OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection()); - EmitAlignment(Align); - EmitXXStructorList(GV->getInitializer()); + EmitXXStructorList(GV->getInitializer(), /* isCtor */ false); if (TM.getRelocationModel() == Reloc::Static && MAI->hasStaticCtorDtorReferenceInStaticMode()) { @@ -1240,7 +1268,7 @@ void AsmPrinter::EmitLLVMUsedList(const Constant *List) { } } -typedef std::pair Structor; +typedef std::pair Structor; static bool priority_order(const Structor& lhs, const Structor& rhs) { return lhs.first < rhs.first; @@ -1248,7 +1276,7 @@ static bool priority_order(const Structor& lhs, const Structor& rhs) { /// EmitXXStructorList - Emit the ctor or dtor list taking into account the init /// priority. -void AsmPrinter::EmitXXStructorList(const Constant *List) { +void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { // Should be an array of '{ int, void ()* }' structs. The first value is the // init priority. if (!isa(List)) return; @@ -1274,19 +1302,20 @@ void AsmPrinter::EmitXXStructorList(const Constant *List) { CS->getOperand(1))); } - // Emit the function pointers in reverse priority order. - switch (MAI->getStructorOutputOrder()) { - case Structors::None: - break; - case Structors::PriorityOrder: - std::sort(Structors.begin(), Structors.end(), priority_order); - break; - case Structors::ReversePriorityOrder: - std::sort(Structors.rbegin(), Structors.rend(), priority_order); - break; + // Emit the function pointers in the target-specific order + const TargetData *TD = TM.getTargetData(); + unsigned Align = Log2_32(TD->getPointerPrefAlignment()); + std::stable_sort(Structors.begin(), Structors.end(), priority_order); + for (unsigned i = 0, e = Structors.size(); i != e; ++i) { + const MCSection *OutputSection = + (isCtor ? + getObjFileLowering().getStaticCtorSection(Structors[i].first) : + getObjFileLowering().getStaticDtorSection(Structors[i].first)); + OutStreamer.SwitchSection(OutputSection); + if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection()) + EmitAlignment(Align); + EmitXXStructor(Structors[i].second); } - for (unsigned i = 0, e = Structors.size(); i != e; ++i) - EmitGlobalConstant(Structors[i].second); } //===--------------------------------------------------------------------===// @@ -1423,7 +1452,6 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { const ConstantExpr *CE = dyn_cast(CV); if (CE == 0) { llvm_unreachable("Unknown constant value to lower!"); - return MCConstantExpr::Create(0, Ctx); } switch (CE->getOpcode()) { @@ -1445,7 +1473,6 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { !AP.MF ? 0 : AP.MF->getFunction()->getParent()); report_fatal_error(OS.str()); } - return MCConstantExpr::Create(0, Ctx); case Instruction::GetElementPtr: { const TargetData &TD = *AP.TM.getTargetData(); // Generate a symbolic expression for the byte address @@ -1540,6 +1567,19 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace, AsmPrinter &AP); +/// isRepeatedByteSequence - Determine whether the given value is +/// composed of a repeated sequence of identical bytes and return the +/// byte value. If it is not a repeated sequence, return -1. +static int isRepeatedByteSequence(const ConstantDataSequential *V) { + StringRef Data = V->getRawDataValues(); + assert(!Data.empty() && "Empty aggregates should be CAZ node"); + char C = Data[0]; + for (unsigned i = 1, e = Data.size(); i != e; ++i) + if (Data[i] != C) return -1; + return static_cast(C); // Ensure 255 is not returned as -1. +} + + /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the /// byte value. If it is not a repeated sequence, return -1. @@ -1568,8 +1608,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { if (const ConstantArray *CA = dyn_cast(V)) { // Make sure all array elements are sequences of the same repeated // byte. - if (CA->getNumOperands() == 0) return -1; - + assert(CA->getNumOperands() != 0 && "Should be a CAZ"); int Byte = isRepeatedByteSequence(CA->getOperand(0), TM); if (Byte == -1) return -1; @@ -1580,37 +1619,92 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { } return Byte; } + + if (const ConstantDataSequential *CDS = dyn_cast(V)) + return isRepeatedByteSequence(CDS); return -1; } -static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, - AsmPrinter &AP) { - if (AddrSpace != 0 || !CA->isString()) { - // Not a string. Print the values in successive locations. +static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS, + unsigned AddrSpace,AsmPrinter &AP){ + + // See if we can aggregate this into a .fill, if so, emit it as such. + int Value = isRepeatedByteSequence(CDS, AP.TM); + if (Value != -1) { + uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CDS->getType()); + // Don't emit a 1-byte object as a .fill. + if (Bytes > 1) + return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); + } + + // If this can be emitted with .ascii/.asciz, emit it as such. + if (CDS->isString()) + return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace); - // See if we can aggregate some values. Make sure it can be - // represented as a series of bytes of the constant value. - int Value = isRepeatedByteSequence(CA, AP.TM); - - if (Value != -1) { - uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType()); - AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); + // Otherwise, emit the values in successive locations. + unsigned ElementByteSize = CDS->getElementByteSize(); + if (isa(CDS->getElementType())) { + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + if (AP.isVerbose()) + AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n", + CDS->getElementAsInteger(i)); + AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i), + ElementByteSize, AddrSpace); } - else { - for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); + } else if (ElementByteSize == 4) { + // FP Constants are printed as integer constants to avoid losing + // precision. + assert(CDS->getElementType()->isFloatTy()); + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + union { + float F; + uint32_t I; + }; + + F = CDS->getElementAsFloat(i); + if (AP.isVerbose()) + AP.OutStreamer.GetCommentOS() << "float " << F << '\n'; + AP.OutStreamer.EmitIntValue(I, 4, AddrSpace); + } + } else { + assert(CDS->getElementType()->isDoubleTy()); + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + union { + double F; + uint64_t I; + }; + + F = CDS->getElementAsDouble(i); + if (AP.isVerbose()) + AP.OutStreamer.GetCommentOS() << "double " << F << '\n'; + AP.OutStreamer.EmitIntValue(I, 8, AddrSpace); } - return; } - // Otherwise, it can be emitted as .ascii. - SmallVector TmpVec; - TmpVec.reserve(CA->getNumOperands()); - for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - TmpVec.push_back(cast(CA->getOperand(i))->getZExtValue()); + const TargetData &TD = *AP.TM.getTargetData(); + unsigned Size = TD.getTypeAllocSize(CDS->getType()); + unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) * + CDS->getNumElements(); + if (unsigned Padding = Size - EmittedSize) + AP.OutStreamer.EmitZeros(Padding, AddrSpace); - AP.OutStreamer.EmitBytes(StringRef(TmpVec.data(), TmpVec.size()), AddrSpace); +} + +static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, + AsmPrinter &AP) { + // See if we can aggregate some values. Make sure it can be + // represented as a series of bytes of the constant value. + int Value = isRepeatedByteSequence(CA, AP.TM); + + if (Value != -1) { + uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType()); + AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); + } + else { + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) + EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); + } } static void EmitGlobalConstantVector(const ConstantVector *CV, @@ -1656,29 +1750,44 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS, static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AsmPrinter &AP) { - // FP Constants are printed as integer constants to avoid losing - // precision. - if (CFP->getType()->isDoubleTy()) { + if (CFP->getType()->isHalfTy()) { if (AP.isVerbose()) { - double Val = CFP->getValueAPF().convertToDouble(); - AP.OutStreamer.GetCommentOS() << "double " << Val << '\n'; + SmallString<10> Str; + CFP->getValueAPF().toString(Str); + AP.OutStreamer.GetCommentOS() << "half " << Str << '\n'; } - uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace); + AP.OutStreamer.EmitIntValue(Val, 2, AddrSpace); return; } if (CFP->getType()->isFloatTy()) { if (AP.isVerbose()) { float Val = CFP->getValueAPF().convertToFloat(); - AP.OutStreamer.GetCommentOS() << "float " << Val << '\n'; + uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + AP.OutStreamer.GetCommentOS() << "float " << Val << '\n' + << " (" << format("0x%x", IntVal) << ")\n"; } uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace); return; } + // FP Constants are printed as integer constants to avoid losing + // precision. + if (CFP->getType()->isDoubleTy()) { + if (AP.isVerbose()) { + double Val = CFP->getValueAPF().convertToDouble(); + uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + AP.OutStreamer.GetCommentOS() << "double " << Val << '\n' + << " (" << format("0x%lx", IntVal) << ")\n"; + } + + uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace); + return; + } + if (CFP->getType()->isX86_FP80Ty()) { // all long double variants are printed as hex // API needed to prevent premature destruction @@ -1742,20 +1851,20 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI, static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, AsmPrinter &AP) { - if (isa(CV) || isa(CV)) { - uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); + const TargetData *TD = AP.TM.getTargetData(); + uint64_t Size = TD->getTypeAllocSize(CV->getType()); + if (isa(CV) || isa(CV)) return AP.OutStreamer.EmitZeros(Size, AddrSpace); - } if (const ConstantInt *CI = dyn_cast(CV)) { - unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); switch (Size) { case 1: case 2: case 4: case 8: if (AP.isVerbose()) - AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue()); + AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n", + CI->getZExtValue()); AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); return; default: @@ -1764,29 +1873,45 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, } } + if (const ConstantFP *CFP = dyn_cast(CV)) + return EmitGlobalConstantFP(CFP, AddrSpace, AP); + + if (isa(CV)) { + AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); + return; + } + + if (const ConstantDataSequential *CDS = dyn_cast(CV)) + return EmitGlobalConstantDataSequential(CDS, AddrSpace, AP); + if (const ConstantArray *CVA = dyn_cast(CV)) return EmitGlobalConstantArray(CVA, AddrSpace, AP); if (const ConstantStruct *CVS = dyn_cast(CV)) return EmitGlobalConstantStruct(CVS, AddrSpace, AP); - if (const ConstantFP *CFP = dyn_cast(CV)) - return EmitGlobalConstantFP(CFP, AddrSpace, AP); + if (const ConstantExpr *CE = dyn_cast(CV)) { + // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of + // vectors). + if (CE->getOpcode() == Instruction::BitCast) + return EmitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP); - if (isa(CV)) { - unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); - AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); - return; + if (Size > 8) { + // If the constant expression's size is greater than 64-bits, then we have + // to emit the value in chunks. Try to constant fold the value and emit it + // that way. + Constant *New = ConstantFoldConstantExpression(CE, TD); + if (New && New != CE) + return EmitGlobalConstantImpl(New, AddrSpace, AP); + } } - + if (const ConstantVector *V = dyn_cast(CV)) return EmitGlobalConstantVector(V, AddrSpace, AP); - + // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - AP.OutStreamer.EmitValue(LowerConstant(CV, AP), - AP.TM.getTargetData()->getTypeAllocSize(CV->getType()), - AddrSpace); + AP.OutStreamer.EmitValue(LowerConstant(CV, AP), Size, AddrSpace); } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. @@ -1953,7 +2078,7 @@ static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB, void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { // Emit an alignment directive for this block, if needed. if (unsigned Align = MBB->getAlignment()) - EmitAlignment(Log2_32(Align)); + EmitAlignment(Align); // If the block has its address taken, emit any labels that were used to // reference the block. It is possible that there is more than one label @@ -1970,27 +2095,22 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { OutStreamer.EmitLabel(Syms[i]); } + // Print some verbose block comments. + if (isVerbose()) { + if (const BasicBlock *BB = MBB->getBasicBlock()) + if (BB->hasName()) + OutStreamer.AddComment("%" + BB->getName()); + EmitBasicBlockLoopComments(*MBB, LI, *this); + } + // Print the main label for the block. if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) { if (isVerbose() && OutStreamer.hasRawTextSupport()) { - if (const BasicBlock *BB = MBB->getBasicBlock()) - if (BB->hasName()) - OutStreamer.AddComment("%" + BB->getName()); - - EmitBasicBlockLoopComments(*MBB, LI, *this); - // NOTE: Want this comment at start of line, don't emit with AddComment. OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" + Twine(MBB->getNumber()) + ":"); } } else { - if (isVerbose()) { - if (const BasicBlock *BB = MBB->getBasicBlock()) - if (BB->hasName()) - OutStreamer.AddComment("%" + BB->getName()); - EmitBasicBlockLoopComments(*MBB, LI, *this); - } - OutStreamer.EmitLabel(MBB->getSymbol()); } } @@ -2048,7 +2168,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { MachineInstr &MI = *II; // If it is not a simple branch, we are in a table somewhere. - if (!MI.getDesc().isBranch() || MI.getDesc().isIndirectBranch()) + if (!MI.isBranch() || MI.isIndirectBranch()) return false; // If we are the operands of one of the branches, this is not @@ -2090,6 +2210,4 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { } report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); - return 0; } - diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 4d6c28118427..90d511cbab0a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -25,6 +25,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -35,23 +36,8 @@ using namespace llvm; void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const { if (isVerbose() && Desc) OutStreamer.AddComment(Desc); - - if (MAI->hasLEB128()) { - OutStreamer.EmitSLEB128IntValue(Value); - return; - } - // If we don't have .sleb128, emit as .bytes. - int Sign = Value >> (8 * sizeof(Value) - 1); - bool IsMore; - - do { - unsigned char Byte = static_cast(Value & 0x7f); - Value >>= 7; - IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0; - if (IsMore) Byte |= 0x80; - OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0); - } while (IsMore); + OutStreamer.EmitSLEB128IntValue(Value); } /// EmitULEB128 - emit the specified signed leb128 value. @@ -60,25 +46,7 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc, if (isVerbose() && Desc) OutStreamer.AddComment(Desc); - // FIXME: Should we add a PadTo option to the streamer? - if (MAI->hasLEB128() && PadTo == 0) { - OutStreamer.EmitULEB128IntValue(Value); - return; - } - - // If we don't have .uleb128 or we want to emit padding, emit as .bytes. - do { - unsigned char Byte = static_cast(Value & 0x7f); - Value >>= 7; - if (Value || PadTo != 0) Byte |= 0x80; - OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0); - } while (Value); - - if (PadTo) { - if (PadTo > 1) - OutStreamer.EmitFill(PadTo - 1, 0x80/*fillval*/, 0/*addrspace*/); - OutStreamer.EmitFill(1, 0/*fillval*/, 0/*addrspace*/); - } + OutStreamer.EmitULEB128IntValue(Value, 0/*addrspace*/, PadTo); } /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value. @@ -143,7 +111,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { return 0; switch (Encoding & 0x07) { - default: assert(0 && "Invalid encoded value."); + default: llvm_unreachable("Invalid encoded value."); case dwarf::DW_EH_PE_absptr: return TM.getTargetData()->getPointerSize(); case dwarf::DW_EH_PE_udata2: return 2; case dwarf::DW_EH_PE_udata4: return 4; @@ -177,9 +145,8 @@ void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, const MCSymbol *SectionLabel) const { // On COFF targets, we have to emit the special .secrel32 directive. - if (const char *SecOffDir = MAI->getDwarfSectionOffsetDirective()) { - // FIXME: MCize. - OutStreamer.EmitRawText(SecOffDir + Twine(Label->getName())); + if (MAI->getDwarfSectionOffsetDirective()) { + OutStreamer.EmitCOFFSecRel32(Label); return; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 8eda889155a2..d60585465be0 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -326,7 +326,11 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; } - if (OpNo >= MI->getNumOperands()) { + // We may have a location metadata attached to the end of the + // instruction, and at no point should see metadata at any + // other point while processing. It's an error if so. + if (OpNo >= MI->getNumOperands() || + MI->getOperand(OpNo).isMetadata()) { Error = true; } else { unsigned OpFlags = MI->getOperand(OpNo).getImm(); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 9c1ce761b0c5..3776848e3f47 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -112,15 +112,6 @@ DIE::~DIE() { delete Children[i]; } -/// addSiblingOffset - Add a sibling offset field to the front of the DIE. -/// -DIEValue *DIE::addSiblingOffset(BumpPtrAllocator &A) { - DIEInteger *DI = new (A) DIEInteger(0); - Values.insert(Values.begin(), DI); - Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4); - return DI; -} - #ifndef NDEBUG void DIE::print(raw_ostream &O, unsigned IncIndent) { IndentCount += IncIndent; @@ -174,6 +165,7 @@ void DIE::dump() { } #endif +void DIEValue::anchor() { } #ifndef NDEBUG void DIEValue::dump() { @@ -223,33 +215,14 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer); case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer); case dwarf::DW_FORM_addr: return AP->getTargetData().getPointerSize(); - default: llvm_unreachable("DIE Value form not supported yet"); break; + default: llvm_unreachable("DIE Value form not supported yet"); } - return 0; } #ifndef NDEBUG void DIEInteger::print(raw_ostream &O) { - O << "Int: " << (int64_t)Integer - << format(" 0x%llx", (unsigned long long)Integer); -} -#endif - -//===----------------------------------------------------------------------===// -// DIEString Implementation -//===----------------------------------------------------------------------===// - -/// EmitValue - Emit string value. -/// -void DIEString::EmitValue(AsmPrinter *AP, unsigned Form) const { - AP->OutStreamer.EmitBytes(Str, /*addrspace*/0); - // Emit nul terminator. - AP->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0); -} - -#ifndef NDEBUG -void DIEString::print(raw_ostream &O) { - O << "Str: \"" << Str << "\""; + O << "Int: " << (int64_t)Integer << " 0x"; + O.write_hex(Integer); } #endif @@ -267,6 +240,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const { /// unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_strp) return 4; return AP->getTargetData().getPointerSize(); } @@ -290,6 +264,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const { /// unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const { if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_strp) return 4; return AP->getTargetData().getPointerSize(); } @@ -335,7 +310,7 @@ unsigned DIEBlock::ComputeSize(AsmPrinter *AP) { /// void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const { switch (Form) { - default: assert(0 && "Improper form for block"); break; + default: llvm_unreachable("Improper form for block"); case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break; case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break; @@ -355,9 +330,8 @@ unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const { case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); case dwarf::DW_FORM_block: return Size + MCAsmInfo::getULEB128Size(Size); - default: llvm_unreachable("Improper form for block"); break; + default: llvm_unreachable("Improper form for block"); } - return 0; } #ifndef NDEBUG diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h index 7d61f1edff4a..f93ea1b045b2 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h @@ -31,17 +31,17 @@ namespace llvm { class DIEAbbrevData { /// Attribute - Dwarf attribute code. /// - unsigned Attribute; + uint16_t Attribute; /// Form - Dwarf form code. /// - unsigned Form; + uint16_t Form; public: - DIEAbbrevData(unsigned A, unsigned F) : Attribute(A), Form(F) {} + DIEAbbrevData(uint16_t A, uint16_t F) : Attribute(A), Form(F) {} // Accessors. - unsigned getAttribute() const { return Attribute; } - unsigned getForm() const { return Form; } + uint16_t getAttribute() const { return Attribute; } + uint16_t getForm() const { return Form; } /// Profile - Used to gather unique data for the abbreviation folding set. /// @@ -54,41 +54,41 @@ namespace llvm { class DIEAbbrev : public FoldingSetNode { /// Tag - Dwarf tag code. /// - unsigned Tag; + uint16_t Tag; + + /// ChildrenFlag - Dwarf children flag. + /// + uint16_t ChildrenFlag; /// Unique number for node. /// unsigned Number; - /// ChildrenFlag - Dwarf children flag. - /// - unsigned ChildrenFlag; - /// Data - Raw data bytes for abbreviation. /// SmallVector Data; public: - DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {} + DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {} // Accessors. - unsigned getTag() const { return Tag; } + uint16_t getTag() const { return Tag; } unsigned getNumber() const { return Number; } - unsigned getChildrenFlag() const { return ChildrenFlag; } + uint16_t getChildrenFlag() const { return ChildrenFlag; } const SmallVector &getData() const { return Data; } - void setTag(unsigned T) { Tag = T; } - void setChildrenFlag(unsigned CF) { ChildrenFlag = CF; } + void setTag(uint16_t T) { Tag = T; } + void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; } void setNumber(unsigned N) { Number = N; } /// AddAttribute - Adds another set of attribute information to the /// abbreviation. - void AddAttribute(unsigned Attribute, unsigned Form) { + void AddAttribute(uint16_t Attribute, uint16_t Form) { Data.push_back(DIEAbbrevData(Attribute, Form)); } /// AddFirstAttribute - Adds a set of attribute information to the front /// of the abbreviation. - void AddFirstAttribute(unsigned Attribute, unsigned Form) { + void AddFirstAttribute(uint16_t Attribute, uint16_t Form) { Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form)); } @@ -113,10 +113,6 @@ namespace llvm { class DIE { protected: - /// Abbrev - Buffer for constructing abbreviation. - /// - DIEAbbrev Abbrev; - /// Offset - Offset in debug info section. /// unsigned Offset; @@ -125,6 +121,10 @@ namespace llvm { /// unsigned Size; + /// Abbrev - Buffer for constructing abbreviation. + /// + DIEAbbrev Abbrev; + /// Children DIEs. /// std::vector Children; @@ -139,8 +139,8 @@ namespace llvm { mutable unsigned IndentCount; public: explicit DIE(unsigned Tag) - : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0), - Size(0), Parent (0), IndentCount(0) {} + : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0), + IndentCount(0) {} virtual ~DIE(); // Accessors. @@ -163,16 +163,6 @@ namespace llvm { Values.push_back(Value); } - /// SiblingOffset - Return the offset of the debug information entry's - /// sibling. - unsigned getSiblingOffset() const { return Offset + Size; } - - /// addSiblingOffset - Add a sibling offset field to the front of the DIE. - /// The caller is responsible for deleting the return value at or after the - /// same time it destroys this DIE. - /// - DIEValue *addSiblingOffset(BumpPtrAllocator &A); - /// addChild - Add a child to the DIE. /// void addChild(DIE *Child) { @@ -195,12 +185,12 @@ namespace llvm { /// DIEValue - A debug information entry value. /// class DIEValue { + virtual void anchor(); public: enum { isInteger, isString, isLabel, - isSectionOffset, isDelta, isEntry, isBlock @@ -270,33 +260,6 @@ namespace llvm { static bool classof(const DIEInteger *) { return true; } static bool classof(const DIEValue *I) { return I->getType() == isInteger; } -#ifndef NDEBUG - virtual void print(raw_ostream &O); -#endif - }; - - //===--------------------------------------------------------------------===// - /// DIEString - A string value DIE. This DIE keeps string reference only. - /// - class DIEString : public DIEValue { - const StringRef Str; - public: - explicit DIEString(const StringRef S) : DIEValue(isString), Str(S) {} - - /// EmitValue - Emit string value. - /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; - - /// SizeOf - Determine size of string value in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned /*Form*/) const { - return Str.size() + sizeof(char); // sizeof('\0'); - } - - // Implement isa/cast/dyncast. - static bool classof(const DIEString *) { return true; } - static bool classof(const DIEValue *S) { return S->getType() == isString; } - #ifndef NDEBUG virtual void print(raw_ostream &O); #endif @@ -359,7 +322,7 @@ namespace llvm { }; //===--------------------------------------------------------------------===// - /// DIEntry - A pointer to another debug information entry. An instance of + /// DIEEntry - A pointer to another debug information entry. An instance of /// this class can also be used as a proxy for a debug information entry not /// yet defined (ie. types.) class DIEEntry : public DIEValue { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp new file mode 100644 index 000000000000..660684d1bea5 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -0,0 +1,287 @@ +//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf accelerator tables. +// +//===----------------------------------------------------------------------===// + +#include "DwarfAccelTable.h" +#include "DwarfDebug.h" +#include "DIE.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) { + switch (AT) { + case eAtomTypeNULL: return "eAtomTypeNULL"; + case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset"; + case eAtomTypeCUOffset: return "eAtomTypeCUOffset"; + case eAtomTypeTag: return "eAtomTypeTag"; + case eAtomTypeNameFlags: return "eAtomTypeNameFlags"; + case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags"; + } + llvm_unreachable("invalid AtomType!"); +} + +// The general case would need to have a less hard coded size for the +// length of the HeaderData, however, if we're constructing based on a +// single Atom then we know it will always be: 4 + 4 + 2 + 2. +DwarfAccelTable::DwarfAccelTable(DwarfAccelTable::Atom atom) : + Header(12), + HeaderData(atom) { +} + +// The length of the header data is always going to be 4 + 4 + 4*NumAtoms. +DwarfAccelTable::DwarfAccelTable(std::vector &atomList) : + Header(8 + (atomList.size() * 4)), + HeaderData(atomList) { +} + +DwarfAccelTable::~DwarfAccelTable() { + for (size_t i = 0, e = Data.size(); i < e; ++i) + delete Data[i]; + for (StringMap::iterator + EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) + for (DataArray::iterator DI = EI->second.begin(), + DE = EI->second.end(); DI != DE; ++DI) + delete (*DI); +} + +void DwarfAccelTable::AddName(StringRef Name, DIE* die, char Flags) { + // If the string is in the list already then add this die to the list + // otherwise add a new one. + DataArray &DIEs = Entries[Name]; + DIEs.push_back(new HashDataContents(die, Flags)); +} + +void DwarfAccelTable::ComputeBucketCount(void) { + // First get the number of unique hashes. + std::vector uniques(Data.size()); + for (size_t i = 0, e = Data.size(); i < e; ++i) + uniques[i] = Data[i]->HashValue; + array_pod_sort(uniques.begin(), uniques.end()); + std::vector::iterator p = + std::unique(uniques.begin(), uniques.end()); + uint32_t num = std::distance(uniques.begin(), p); + + // Then compute the bucket size, minimum of 1 bucket. + if (num > 1024) Header.bucket_count = num/4; + if (num > 16) Header.bucket_count = num/2; + else Header.bucket_count = num > 0 ? num : 1; + + Header.hashes_count = num; +} + +namespace { + // DIESorter - comparison predicate that sorts DIEs by their offset. + struct DIESorter { + bool operator()(const struct DwarfAccelTable::HashDataContents *A, + const struct DwarfAccelTable::HashDataContents *B) const { + return A->Die->getOffset() < B->Die->getOffset(); + } + }; +} + +void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) { + // Create the individual hash data outputs. + for (StringMap::iterator + EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { + struct HashData *Entry = new HashData((*EI).getKeyData()); + + // Unique the entries. + std::stable_sort(EI->second.begin(), EI->second.end(), DIESorter()); + EI->second.erase(std::unique(EI->second.begin(), EI->second.end()), + EI->second.end()); + + for (DataArray::const_iterator DI = EI->second.begin(), + DE = EI->second.end(); + DI != DE; ++DI) + Entry->addData((*DI)); + Data.push_back(Entry); + } + + // Figure out how many buckets we need, then compute the bucket + // contents and the final ordering. We'll emit the hashes and offsets + // by doing a walk during the emission phase. We add temporary + // symbols to the data so that we can reference them during the offset + // later, we'll emit them when we emit the data. + ComputeBucketCount(); + + // Compute bucket contents and final ordering. + Buckets.resize(Header.bucket_count); + for (size_t i = 0, e = Data.size(); i < e; ++i) { + uint32_t bucket = Data[i]->HashValue % Header.bucket_count; + Buckets[bucket].push_back(Data[i]); + Data[i]->Sym = Asm->GetTempSymbol(Prefix, i); + } +} + +// Emits the header for the table via the AsmPrinter. +void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) { + Asm->OutStreamer.AddComment("Header Magic"); + Asm->EmitInt32(Header.magic); + Asm->OutStreamer.AddComment("Header Version"); + Asm->EmitInt16(Header.version); + Asm->OutStreamer.AddComment("Header Hash Function"); + Asm->EmitInt16(Header.hash_function); + Asm->OutStreamer.AddComment("Header Bucket Count"); + Asm->EmitInt32(Header.bucket_count); + Asm->OutStreamer.AddComment("Header Hash Count"); + Asm->EmitInt32(Header.hashes_count); + Asm->OutStreamer.AddComment("Header Data Length"); + Asm->EmitInt32(Header.header_data_len); + Asm->OutStreamer.AddComment("HeaderData Die Offset Base"); + Asm->EmitInt32(HeaderData.die_offset_base); + Asm->OutStreamer.AddComment("HeaderData Atom Count"); + Asm->EmitInt32(HeaderData.Atoms.size()); + for (size_t i = 0; i < HeaderData.Atoms.size(); i++) { + Atom A = HeaderData.Atoms[i]; + Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type)); + Asm->EmitInt16(A.type); + Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form)); + Asm->EmitInt16(A.form); + } +} + +// Walk through and emit the buckets for the table. This will look +// like a list of numbers of how many elements are in each bucket. +void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { + unsigned index = 0; + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + Asm->OutStreamer.AddComment("Bucket " + Twine(i)); + if (Buckets[i].size() != 0) + Asm->EmitInt32(index); + else + Asm->EmitInt32(UINT32_MAX); + index += Buckets[i].size(); + } +} + +// Walk through the buckets and emit the individual hashes for each +// bucket. +void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) { + Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i)); + Asm->EmitInt32((*HI)->HashValue); + } + } +} + +// Walk through the buckets and emit the individual offsets for each +// element in each bucket. This is done via a symbol subtraction from the +// beginning of the section. The non-section symbol will be output later +// when we emit the actual data. +void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) { + Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i)); + MCContext &Context = Asm->OutStreamer.getContext(); + const MCExpr *Sub = + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context), + MCSymbolRefExpr::Create(SecBegin, Context), + Context); + Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t), 0); + } + } +} + +// Walk through the buckets and emit the full data for each element in +// the bucket. For the string case emit the dies and the various offsets. +// Terminate each HashData bucket with 0. +void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { + uint64_t PrevHash = UINT64_MAX; + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) { + // Remember to emit the label for our offset. + Asm->OutStreamer.EmitLabel((*HI)->Sym); + Asm->OutStreamer.AddComment((*HI)->Str); + Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str), + D->getStringPool()); + Asm->OutStreamer.AddComment("Num DIEs"); + Asm->EmitInt32((*HI)->Data.size()); + for (std::vector::const_iterator + DI = (*HI)->Data.begin(), DE = (*HI)->Data.end(); + DI != DE; ++DI) { + // Emit the DIE offset + Asm->EmitInt32((*DI)->Die->getOffset()); + // If we have multiple Atoms emit that info too. + // FIXME: A bit of a hack, we either emit only one atom or all info. + if (HeaderData.Atoms.size() > 1) { + Asm->EmitInt16((*DI)->Die->getTag()); + Asm->EmitInt8((*DI)->Flags); + } + } + // Emit a 0 to terminate the data unless we have a hash collision. + if (PrevHash != (*HI)->HashValue) + Asm->EmitInt32(0); + PrevHash = (*HI)->HashValue; + } + } +} + +// Emit the entire data structure to the output file. +void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, + DwarfDebug *D) { + // Emit the header. + EmitHeader(Asm); + + // Emit the buckets. + EmitBuckets(Asm); + + // Emit the hashes. + EmitHashes(Asm); + + // Emit the offsets. + EmitOffsets(Asm, SecBegin); + + // Emit the hash data. + EmitData(Asm, D); +} + +#ifndef NDEBUG +void DwarfAccelTable::print(raw_ostream &O) { + + Header.print(O); + HeaderData.print(O); + + O << "Entries: \n"; + for (StringMap::const_iterator + EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { + O << "Name: " << EI->getKeyData() << "\n"; + for (DataArray::const_iterator DI = EI->second.begin(), + DE = EI->second.end(); + DI != DE; ++DI) + (*DI)->print(O); + } + + O << "Buckets and Hashes: \n"; + for (size_t i = 0, e = Buckets.size(); i < e; ++i) + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) + (*HI)->print(O); + + O << "Data: \n"; + for (std::vector::const_iterator + DI = Data.begin(), DE = Data.end(); DI != DE; ++DI) + (*DI)->print(O); + + +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h new file mode 100644 index 000000000000..2278d4c784f4 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -0,0 +1,290 @@ +//==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf accelerator tables. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ +#define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ + +#include "llvm/ADT/StringMap.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" +#include "DIE.h" +#include +#include + +// The dwarf accelerator tables are an indirect hash table optimized +// for null lookup rather than access to known data. They are output into +// an on-disk format that looks like this: +// +// .-------------. +// | HEADER | +// |-------------| +// | BUCKETS | +// |-------------| +// | HASHES | +// |-------------| +// | OFFSETS | +// |-------------| +// | DATA | +// `-------------' +// +// where the header contains a magic number, version, type of hash function, +// the number of buckets, total number of hashes, and room for a special +// struct of data and the length of that struct. +// +// The buckets contain an index (e.g. 6) into the hashes array. The hashes +// section contains all of the 32-bit hash values in contiguous memory, and +// the offsets contain the offset into the data area for the particular +// hash. +// +// For a lookup example, we could hash a function name and take it modulo the +// number of buckets giving us our bucket. From there we take the bucket value +// as an index into the hashes table and look at each successive hash as long +// as the hash value is still the same modulo result (bucket value) as earlier. +// If we have a match we look at that same entry in the offsets table and +// grab the offset in the data for our final match. + +namespace llvm { + +class AsmPrinter; +class DIE; +class DwarfDebug; + +class DwarfAccelTable { + + enum HashFunctionType { + eHashFunctionDJB = 0u + }; + + static uint32_t HashDJB (StringRef Str) { + uint32_t h = 5381; + for (unsigned i = 0, e = Str.size(); i != e; ++i) + h = ((h << 5) + h) + Str[i]; + return h; + } + + // Helper function to compute the number of buckets needed based on + // the number of unique hashes. + void ComputeBucketCount (void); + + struct TableHeader { + uint32_t magic; // 'HASH' magic value to allow endian detection + uint16_t version; // Version number. + uint16_t hash_function; // The hash function enumeration that was used. + uint32_t bucket_count; // The number of buckets in this hash table. + uint32_t hashes_count; // The total number of unique hash values + // and hash data offsets in this table. + uint32_t header_data_len; // The bytes to skip to get to the hash + // indexes (buckets) for correct alignment. + // Also written to disk is the implementation specific header data. + + static const uint32_t MagicHash = 0x48415348; + + TableHeader (uint32_t data_len) : + magic (MagicHash), version (1), hash_function (eHashFunctionDJB), + bucket_count (0), hashes_count (0), header_data_len (data_len) + {} + +#ifndef NDEBUG + void print(raw_ostream &O) { + O << "Magic: " << format("0x%x", magic) << "\n" + << "Version: " << version << "\n" + << "Hash Function: " << hash_function << "\n" + << "Bucket Count: " << bucket_count << "\n" + << "Header Data Length: " << header_data_len << "\n"; + } + void dump() { print(dbgs()); } +#endif + }; + +public: + // The HeaderData describes the form of each set of data. In general this + // is as a list of atoms (atom_count) where each atom contains a type + // (AtomType type) of data, and an encoding form (form). In the case of + // data that is referenced via DW_FORM_ref_* the die_offset_base is + // used to describe the offset for all forms in the list of atoms. + // This also serves as a public interface of sorts. + // When written to disk this will have the form: + // + // uint32_t die_offset_base + // uint32_t atom_count + // atom_count Atoms + enum AtomType { + eAtomTypeNULL = 0u, + eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding + eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that + // contains the item in question + eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as + // DW_FORM_data1 (if no tags exceed 255) or + // DW_FORM_data2. + eAtomTypeNameFlags = 4u, // Flags from enum NameFlags + eAtomTypeTypeFlags = 5u // Flags from enum TypeFlags + }; + + enum TypeFlags { + eTypeFlagClassMask = 0x0000000fu, + + // Always set for C++, only set for ObjC if this is the + // @implementation for a class. + eTypeFlagClassIsImplementation = ( 1u << 1 ) + }; + + // Make these public so that they can be used as a general interface to + // the class. + struct Atom { + AtomType type; // enum AtomType + uint16_t form; // DWARF DW_FORM_ defines + + Atom(AtomType type, uint16_t form) : type(type), form(form) {} + static const char * AtomTypeString(enum AtomType); +#ifndef NDEBUG + void print(raw_ostream &O) { + O << "Type: " << AtomTypeString(type) << "\n" + << "Form: " << dwarf::FormEncodingString(form) << "\n"; + } + void dump() { + print(dbgs()); + } +#endif + }; + + private: + struct TableHeaderData { + + uint32_t die_offset_base; + std::vector Atoms; + + TableHeaderData(std::vector &AtomList, + uint32_t offset = 0) : + die_offset_base(offset) { + for (size_t i = 0, e = AtomList.size(); i != e; ++i) + Atoms.push_back(AtomList[i]); + } + + TableHeaderData(DwarfAccelTable::Atom Atom, uint32_t offset = 0) + : die_offset_base(offset) { + Atoms.push_back(Atom); + } + +#ifndef NDEBUG + void print (raw_ostream &O) { + O << "die_offset_base: " << die_offset_base << "\n"; + for (size_t i = 0; i < Atoms.size(); i++) + Atoms[i].print(O); + } + void dump() { + print(dbgs()); + } +#endif + }; + + // The data itself consists of a str_offset, a count of the DIEs in the + // hash and the offsets to the DIEs themselves. + // On disk each data section is ended with a 0 KeyType as the end of the + // hash chain. + // On output this looks like: + // uint32_t str_offset + // uint32_t hash_data_count + // HashData[hash_data_count] +public: + struct HashDataContents { + DIE *Die; // Offsets + char Flags; // Specific flags to output + + HashDataContents(DIE *D, char Flags) : + Die(D), + Flags(Flags) { } + #ifndef NDEBUG + void print(raw_ostream &O) const { + O << " Offset: " << Die->getOffset() << "\n"; + O << " Tag: " << dwarf::TagString(Die->getTag()) << "\n"; + O << " Flags: " << Flags << "\n"; + } + #endif + }; +private: + struct HashData { + StringRef Str; + uint32_t HashValue; + MCSymbol *Sym; + std::vector Data; // offsets + HashData(StringRef S) : Str(S) { + HashValue = DwarfAccelTable::HashDJB(S); + } + void addData(struct HashDataContents *Datum) { Data.push_back(Datum); } + #ifndef NDEBUG + void print(raw_ostream &O) { + O << "Name: " << Str << "\n"; + O << " Hash Value: " << format("0x%x", HashValue) << "\n"; + O << " Symbol: " ; + if (Sym) Sym->print(O); + else O << ""; + O << "\n"; + for (size_t i = 0; i < Data.size(); i++) { + O << " Offset: " << Data[i]->Die->getOffset() << "\n"; + O << " Tag: " << dwarf::TagString(Data[i]->Die->getTag()) << "\n"; + O << " Flags: " << Data[i]->Flags << "\n"; + } + } + void dump() { + print(dbgs()); + } + #endif + }; + + DwarfAccelTable(const DwarfAccelTable&); // DO NOT IMPLEMENT + void operator=(const DwarfAccelTable&); // DO NOT IMPLEMENT + + // Internal Functions + void EmitHeader(AsmPrinter *); + void EmitBuckets(AsmPrinter *); + void EmitHashes(AsmPrinter *); + void EmitOffsets(AsmPrinter *, MCSymbol *); + void EmitData(AsmPrinter *, DwarfDebug *D); + + // Output Variables + TableHeader Header; + TableHeaderData HeaderData; + std::vector Data; + + // String Data + typedef std::vector DataArray; + typedef StringMap StringEntries; + StringEntries Entries; + + // Buckets/Hashes/Offsets + typedef std::vector HashList; + typedef std::vector BucketList; + BucketList Buckets; + HashList Hashes; + + // Public Implementation + public: + DwarfAccelTable(DwarfAccelTable::Atom); + DwarfAccelTable(std::vector &); + ~DwarfAccelTable(); + void AddName(StringRef, DIE*, char = 0); + void FinalizeTable(AsmPrinter *, const char *); + void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *); +#ifndef NDEBUG + void print(raw_ostream &O); + void dump() { print(dbgs()); } +#endif +}; + +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 8ed4f4c43a7c..d975f1f97bea 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -142,12 +142,14 @@ void DwarfCFIException::EndFunction() { Asm->OutStreamer.EmitCFIEndProc(); + if (!shouldEmitPersonality) + return; + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber())); // Map all labels and get rid of any dead landing pads. MMI->TidyLandingPads(); - if (shouldEmitPersonality) - EmitExceptionTable(); + EmitExceptionTable(); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 6fe476d02ef7..69dc454ae1d7 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -13,12 +13,14 @@ #define DEBUG_TYPE "dwarfdebug" +#include "DwarfAccelTable.h" #include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "llvm/Constants.h" #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/Analysis/DIBuilder.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" @@ -30,8 +32,9 @@ using namespace llvm; /// CompileUnit - Compile unit constructor. -CompileUnit::CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW) - : ID(I), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) { +CompileUnit::CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A, + DwarfDebug *DW) + : ID(I), Language(L), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) { DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); } @@ -67,12 +70,19 @@ void CompileUnit::addSInt(DIE *Die, unsigned Attribute, Die->addValue(Attribute, Form, Value); } -/// addString - Add a string attribute data and value. DIEString only -/// keeps string reference. -void CompileUnit::addString(DIE *Die, unsigned Attribute, unsigned Form, - StringRef String) { - DIEValue *Value = new (DIEValueAllocator) DIEString(String); - Die->addValue(Attribute, Form, Value); +/// addString - Add a string attribute data and value. We always emit a +/// reference to the string pool instead of immediate strings so that DIEs have +/// more predictable sizes. +void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) { + MCSymbol *Symb = DD->getStringPoolEntry(String); + DIEValue *Value; + if (Asm->needsRelocationsForDwarfStringPool()) + Value = new (DIEValueAllocator) DIELabel(Symb); + else { + MCSymbol *StringPool = DD->getStringPool(); + Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); + } + Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); } /// addLabel - Add a Dwarf label attribute data and value. @@ -98,7 +108,6 @@ void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, Die->addValue(Attribute, Form, createDIEEntry(Entry)); } - /// addBlock - Add block data. /// void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form, @@ -135,8 +144,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { unsigned Line = G.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(), - G.getDirectory()); + unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(), G.getDirectory()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -148,14 +156,14 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { // Verify subprogram. if (!SP.Verify()) return; + // If the line number is 0, don't add it. - if (SP.getLineNumber() == 0) + unsigned Line = SP.getLineNumber(); + if (Line == 0) return; - unsigned Line = SP.getLineNumber(); - if (!SP.getContext().Verify()) - return; - unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), SP.getDirectory()); + unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), + SP.getDirectory()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -169,9 +177,28 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { return; unsigned Line = Ty.getLineNumber(); - if (Line == 0 || !Ty.getContext().Verify()) + if (Line == 0) return; - unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory()); + unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), + Ty.getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { + // Verify type. + if (!Ty.Verify()) + return; + + unsigned Line = Ty.getLineNumber(); + if (Line == 0) + return; + DIFile File = Ty.getFile(); + unsigned FileID = DD->GetOrCreateSourceID(File.getFilename(), + File.getDirectory()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -458,7 +485,7 @@ static bool isTypeSigned(DIType Ty, int *SizeInBits) { /// addConstantValue - Add constant value entry in variable DIE. bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty) { - assert (MO.isImm() && "Invalid machine operand!"); + assert(MO.isImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); int SizeInBits = -1; bool SignedConstant = isTypeSigned(Ty, &SizeInBits); @@ -558,8 +585,8 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { Buffer.addChild(getOrCreateTemplateValueParameterDIE( DITemplateValueParameter(Element))); } - } + /// addToContextOwner - Add Die into the list of its context owner's children. void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) { if (Context.isType()) { @@ -598,13 +625,29 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { assert(Ty.isDerivedType() && "Unknown kind of DIType"); constructTypeDIE(*TyDIE, DIDerivedType(Ty)); } - + // If this is a named finished type then include it in the list of types + // for the accelerator tables. + if (!Ty.getName().empty() && !Ty.isForwardDecl()) { + bool IsImplementation = 0; + if (Ty.isCompositeType()) { + DICompositeType CT(Ty); + // A runtime language of 0 actually means C/C++ and that any + // non-negative value is some version of Objective-C/C++. + IsImplementation = (CT.getRunTimeLang() == 0) || + CT.isObjcClassComplete(); + } + unsigned Flags = IsImplementation ? + DwarfAccelTable::eTypeFlagClassIsImplementation : 0; + addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags)); + } + addToContextOwner(TyDIE, Ty.getContext()); return TyDIE; } /// addType - Add a new type attribute to the specified entity. -void CompileUnit::addType(DIE *Entity, DIType Ty) { +void CompileUnit::addType(DIE *Entity, DIType Ty, + unsigned Attribute) { if (!Ty.Verify()) return; @@ -612,7 +655,7 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) { DIEEntry *Entry = getDIEEntry(Ty); // If it exists then use the existing value. if (Entry) { - Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); + Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry); return; } @@ -622,7 +665,7 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) { // Set up proxy. Entry = createDIEEntry(Buffer); insertDIEEntry(Ty, Entry); - Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); + Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry); // If this is a complete composite type then include it in the // list of global types. @@ -662,7 +705,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { StringRef Name = BTy.getName(); // Add name if not anonymous or intermediate type. if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + addString(&Buffer, dwarf::DW_AT_name, Name); if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) { Buffer.setTag(dwarf::DW_TAG_unspecified_type); @@ -671,8 +714,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { } Buffer.setTag(dwarf::DW_TAG_base_type); - addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - BTy.getEncoding()); + addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + BTy.getEncoding()); uint64_t Size = BTy.getSizeInBits() >> 3; addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); @@ -696,10 +739,10 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Add name if not anonymous or intermediate type. if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + addString(&Buffer, dwarf::DW_AT_name, Name); // Add size if non-zero (derived types might be zero-sized.) - if (Size) + if (Size && Tag != dwarf::DW_TAG_pointer_type) addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); // Add source line info if available and TyDesc is not a forward declaration. @@ -755,8 +798,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { Buffer.addChild(Arg); } } - // Add prototype flag. - if (isPrototyped) + // Add prototype flag if we're dealing with a C language and the + // function has been prototyped. + if (isPrototyped && + (Language == dwarf::DW_LANG_C89 || + Language == dwarf::DW_LANG_C99 || + Language == dwarf::DW_LANG_ObjC)) addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); } break; @@ -779,13 +826,13 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { DISubprogram SP(Element); ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element)); if (SP.isProtected()) - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_protected); else if (SP.isPrivate()) - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); else - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); if (SP.isExplicit()) addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1); @@ -793,15 +840,54 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { else if (Element.isVariable()) { DIVariable DV(Element); ElemDie = new DIE(dwarf::DW_TAG_variable); - addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, - DV.getName()); + addString(ElemDie, dwarf::DW_AT_name, DV.getName()); addType(ElemDie, DV.getType()); addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); addSourceLine(ElemDie, DV); - } else if (Element.isDerivedType()) - ElemDie = createMemberDIE(DIDerivedType(Element)); - else + } else if (Element.isDerivedType()) { + DIDerivedType DDTy(Element); + if (DDTy.getTag() == dwarf::DW_TAG_friend) { + ElemDie = new DIE(dwarf::DW_TAG_friend); + addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend); + } else + ElemDie = createMemberDIE(DIDerivedType(Element)); + } else if (Element.isObjCProperty()) { + DIObjCProperty Property(Element); + ElemDie = new DIE(Property.getTag()); + StringRef PropertyName = Property.getObjCPropertyName(); + addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); + addType(ElemDie, Property.getType()); + addSourceLine(ElemDie, Property); + StringRef GetterName = Property.getObjCPropertyGetterName(); + if (!GetterName.empty()) + addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName); + StringRef SetterName = Property.getObjCPropertySetterName(); + if (!SetterName.empty()) + addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName); + unsigned PropertyAttributes = 0; + if (Property.isReadOnlyObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly; + if (Property.isReadWriteObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite; + if (Property.isAssignObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign; + if (Property.isRetainObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain; + if (Property.isCopyObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy; + if (Property.isNonAtomicObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic; + if (PropertyAttributes) + addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0, + PropertyAttributes); + + DIEEntry *Entry = getDIEEntry(Element); + if (!Entry) { + Entry = createDIEEntry(ElemDie); + insertDIEEntry(Element, Entry); + } + } else continue; Buffer.addChild(ElemDie); } @@ -809,11 +895,6 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (CTy.isAppleBlockExtension()) addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); - unsigned RLang = CTy.getRunTimeLang(); - if (RLang) - addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, - dwarf::DW_FORM_data1, RLang); - DICompositeType ContainingType = CTy.getContainingType(); if (DIDescriptor(ContainingType).isCompositeType()) addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, @@ -827,7 +908,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addUInt(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type, dwarf::DW_FORM_flag, 1); - if (Tag == dwarf::DW_TAG_class_type) + // Add template parameters to a class, structure or union types. + // FIXME: The support isn't in the metadata for this yet. + if (Tag == dwarf::DW_TAG_class_type || + Tag == dwarf::DW_TAG_structure_type || + Tag == dwarf::DW_TAG_union_type) addTemplateParams(Buffer, CTy.getTemplateParams()); break; @@ -838,11 +923,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add name if not anonymous or intermediate type. if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + addString(&Buffer, dwarf::DW_AT_name, Name); if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) - { + { // Add size if non-zero (derived types might be zero-sized.) if (Size) addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); @@ -857,6 +942,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add source line info if available. if (!CTy.isForwardDecl()) addSourceLine(&Buffer, CTy); + + // No harm in adding the runtime language to the declaration. + unsigned RLang = CTy.getRunTimeLang(); + if (RLang) + addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, + dwarf::DW_FORM_data1, RLang); } } @@ -870,7 +961,7 @@ CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter); addType(ParamDIE, TP.getType()); - addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName()); + addString(ParamDIE, dwarf::DW_AT_name, TP.getName()); return ParamDIE; } @@ -885,7 +976,7 @@ CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter); addType(ParamDIE, TPV.getType()); if (!TPV.getName().empty()) - addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName()); + addString(ParamDIE, dwarf::DW_AT_name, TPV.getName()); addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, TPV.getValue()); return ParamDIE; @@ -898,8 +989,11 @@ DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) { return NDie; NDie = new DIE(dwarf::DW_TAG_namespace); insertDIE(NS, NDie); - if (!NS.getName().empty()) - addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName()); + if (!NS.getName().empty()) { + addString(NDie, dwarf::DW_AT_name, NS.getName()); + addAccelNamespace(NS.getName(), NDie); + } else + addAccelNamespace("(anonymous namespace)", NDie); addSourceLine(NDie, NS); addToContextOwner(NDie, NS.getContext()); return NDie; @@ -921,6 +1015,12 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { if (SPDie) return SPDie; + DISubprogram SPDecl = SP.getFunctionDeclaration(); + DIE *DeclDie = NULL; + if (SPDecl.isSubprogram()) { + DeclDie = getOrCreateSubprogramDIE(SPDecl); + } + SPDie = new DIE(dwarf::DW_TAG_subprogram); // DW_TAG_inlined_subroutine may refer to this DIE. @@ -932,25 +1032,36 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Add function template parameters. addTemplateParams(*SPDie, SP.getTemplateParams()); + // Unfortunately this code needs to stay here to work around + // a bug in older gdbs that requires the linkage name to resolve + // multiple template functions. StringRef LinkageName = SP.getLinkageName(); if (!LinkageName.empty()) - addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, - dwarf::DW_FORM_string, - getRealLinkageName(LinkageName)); + addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, + getRealLinkageName(LinkageName)); // If this DIE is going to refer declaration info using AT_specification // then there is no need to add other attributes. - if (SP.getFunctionDeclaration().isSubprogram()) + if (DeclDie) { + // Refer function declaration directly. + addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + DeclDie); + return SPDie; + } // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) - addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, - SP.getName()); + addString(SPDie, dwarf::DW_AT_name, SP.getName()); addSourceLine(SPDie, SP); - if (SP.isPrototyped()) + // Add the prototype if we have a prototype and we have a C like + // language. + if (SP.isPrototyped() && + (Language == dwarf::DW_LANG_C89 || + Language == dwarf::DW_LANG_C99 || + Language == dwarf::DW_LANG_ObjC)) addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); // Add Return Type. @@ -965,7 +1076,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { unsigned VK = SP.getVirtuality(); if (VK) { - addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK); + addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK); DIEBlock *Block = getDIEBlock(); addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); @@ -1052,31 +1163,30 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { insertDIE(N, VariableDIE); // Add name. - addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, - GV.getDisplayName()); + addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); StringRef LinkageName = GV.getLinkageName(); bool isGlobalVariable = GV.getGlobal() != NULL; if (!LinkageName.empty() && isGlobalVariable) - addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, - dwarf::DW_FORM_string, - getRealLinkageName(LinkageName)); + addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + getRealLinkageName(LinkageName)); // Add type. DIType GTy = GV.getType(); addType(VariableDIE, GTy); // Add scoping info. - if (!GV.isLocalToUnit()) { + if (!GV.isLocalToUnit()) addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - // Expose as global. - addGlobal(GV.getName(), VariableDIE); - } + // Add line number info. addSourceLine(VariableDIE, GV); // Add to context owner. DIDescriptor GVContext = GV.getContext(); addToContextOwner(VariableDIE, GVContext); // Add location. + bool addToAccelTable = false; + DIE *VariableSpecDIE = NULL; if (isGlobalVariable) { + addToAccelTable = true; DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); addLabel(Block, 0, dwarf::DW_FORM_udata, @@ -1086,7 +1196,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && !GVContext.isFile() && !isSubprogramContext(GVContext)) { // Create specification DIE. - DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); + VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, VariableDIE); addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); @@ -1095,11 +1205,12 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addDie(VariableSpecDIE); } else { addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); - } + } } else if (const ConstantInt *CI = dyn_cast_or_null(GV.getConstant())) addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType()); else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { + addToAccelTable = true; // GV is a merged global. DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); Value *Ptr = CE->getOperand(0); @@ -1114,6 +1225,16 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } + if (addToAccelTable) { + DIE *AddrDIE = VariableSpecDIE ? VariableSpecDIE : VariableDIE; + addAccelName(GV.getName(), AddrDIE); + + // If the linkage name is different than the name, go ahead and output + // that as well into the name table. + if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName()) + addAccelName(GV.getLinkageName(), AddrDIE); + } + return; } @@ -1121,8 +1242,8 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); - int64_t L = SR.getLo(); - int64_t H = SR.getHi(); + uint64_t L = SR.getLo(); + uint64_t H = SR.getHi(); // The L value defines the lower bounds which is typically zero for C/C++. The // H value is the upper bounds. Values are 64 bit. H - L + 1 is the size @@ -1135,8 +1256,8 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) return; } if (L) - addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); - addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); + addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); + addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); Buffer.addChild(DW_Subrange); } @@ -1175,7 +1296,7 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) { DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); StringRef Name = ETy.getName(); - addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + addString(Enumerator, dwarf::DW_AT_name, Name); int64_t Value = ETy.getEnumValue(); addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); return Enumerator; @@ -1212,8 +1333,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, dwarf::DW_FORM_ref4, AbsDIE); else { - addString(VariableDie, dwarf::DW_AT_name, - dwarf::DW_FORM_string, Name); + addString(VariableDie, dwarf::DW_AT_name, Name); addSourceLine(VariableDie, DV->getVariable()); addType(VariableDie, DV->getType()); } @@ -1308,7 +1428,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { DIE *MemberDie = new DIE(DT.getTag()); StringRef Name = DT.getName(); if (!Name.empty()) - addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + addString(MemberDie, dwarf::DW_AT_name, Name); addType(MemberDie, DT.getTypeDerivedFrom()); @@ -1366,32 +1486,35 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); if (DT.isProtected()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_protected); else if (DT.isPrivate()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); // Otherwise C++ member and base classes are considered public. else - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); if (DT.isVirtual()) - addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, + addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, dwarf::DW_VIRTUALITY_virtual); // Objective-C properties. + if (MDNode *PNode = DT.getObjCProperty()) + if (DIEEntry *PropertyDie = getDIEEntry(PNode)) + MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4, + PropertyDie); + + // This is only for backward compatibility. StringRef PropertyName = DT.getObjCPropertyName(); if (!PropertyName.empty()) { - addString(MemberDie, dwarf::DW_AT_APPLE_property_name, dwarf::DW_FORM_string, - PropertyName); + addString(MemberDie, dwarf::DW_AT_APPLE_property_name, PropertyName); StringRef GetterName = DT.getObjCPropertyGetterName(); if (!GetterName.empty()) - addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, - dwarf::DW_FORM_string, GetterName); + addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, GetterName); StringRef SetterName = DT.getObjCPropertySetterName(); if (!SetterName.empty()) - addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, - dwarf::DW_FORM_string, SetterName); + addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, SetterName); unsigned PropertyAttributes = 0; if (DT.isReadOnlyObjCProperty()) PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 785926579fa4..45e407e27ffa 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -29,13 +29,17 @@ class ConstantInt; class DbgVariable; //===----------------------------------------------------------------------===// -/// CompileUnit - This dwarf writer support class manages information associate +/// CompileUnit - This dwarf writer support class manages information associated /// with a source file. class CompileUnit { /// ID - File identifier for source. /// unsigned ID; + /// Language - The DW_AT_language of the compile unit + /// + unsigned Language; + /// Die - Compile unit debug information entry. /// const OwningPtr CUDie; @@ -56,14 +60,17 @@ class CompileUnit { /// descriptors to debug information entries using a DIEEntry proxy. DenseMap MDNodeToDIEEntryMap; - /// Globals - A map of globally visible named entities for this unit. - /// - StringMap Globals; - /// GlobalTypes - A map of globally visible types for this unit. /// StringMap GlobalTypes; + /// AccelNames - A map of names for the name accelerator table. + /// + StringMap > AccelNames; + StringMap > AccelObjC; + StringMap > AccelNamespace; + StringMap > > AccelTypes; + /// DIEBlocks - A list of all the DIEBlocks in use. std::vector DIEBlocks; @@ -73,27 +80,56 @@ class CompileUnit { DenseMap ContainingTypeMap; public: - CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW); + CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW); ~CompileUnit(); // Accessors. unsigned getID() const { return ID; } + unsigned getLanguage() const { return Language; } DIE* getCUDie() const { return CUDie.get(); } - const StringMap &getGlobals() const { return Globals; } const StringMap &getGlobalTypes() const { return GlobalTypes; } + const StringMap > &getAccelNames() const { + return AccelNames; + } + const StringMap > &getAccelObjC() const { + return AccelObjC; + } + const StringMap > &getAccelNamespace() const { + return AccelNamespace; + } + const StringMap > > + &getAccelTypes() const { + return AccelTypes; + } + /// hasContent - Return true if this compile unit has something to write out. /// bool hasContent() const { return !CUDie->getChildren().empty(); } - /// addGlobal - Add a new global entity to the compile unit. - /// - void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; } - /// addGlobalType - Add a new global type to the compile unit. /// void addGlobalType(DIType Ty); + + /// addAccelName - Add a new name to the name accelerator table. + void addAccelName(StringRef Name, DIE *Die) { + std::vector &DIEs = AccelNames[Name]; + DIEs.push_back(Die); + } + void addAccelObjC(StringRef Name, DIE *Die) { + std::vector &DIEs = AccelObjC[Name]; + DIEs.push_back(Die); + } + void addAccelNamespace(StringRef Name, DIE *Die) { + std::vector &DIEs = AccelNamespace[Name]; + DIEs.push_back(Die); + } + void addAccelType(StringRef Name, std::pair Die) { + std::vector > &DIEs = AccelTypes[Name]; + DIEs.push_back(Die); + } + /// getDIE - Returns the debug information entry map slot for the /// specified debug variable. DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } @@ -150,8 +186,7 @@ class CompileUnit { /// addString - Add a string attribute data and value. /// - void addString(DIE *Die, unsigned Attribute, unsigned Form, - const StringRef Str); + void addString(DIE *Die, unsigned Attribute, const StringRef Str); /// addLabel - Add a Dwarf label attribute data and value. /// @@ -178,6 +213,7 @@ class CompileUnit { void addSourceLine(DIE *Die, DISubprogram SP); void addSourceLine(DIE *Die, DIType Ty); void addSourceLine(DIE *Die, DINameSpace NS); + void addSourceLine(DIE *Die, DIObjCProperty Ty); /// addAddress - Add an address attribute to a die based on the location /// provided. @@ -225,8 +261,10 @@ class CompileUnit { /// addToContextOwner - Add Die into the list of its context owner's children. void addToContextOwner(DIE *Die, DIDescriptor Context); - /// addType - Add a new type attribute to the specified entity. - void addType(DIE *Entity, DIType Ty); + /// addType - Add a new type attribute to the specified entity. This takes + /// and attribute parameter because DW_AT_friend attributes are also + /// type references. + void addType(DIE *Entity, DIType Ty, unsigned Attribute = dwarf::DW_AT_type); /// getOrCreateNameSpace - Create a DIE for DINameSpace. DIE *getOrCreateNameSpace(DINameSpace NS); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 1b7e370fca09..cb7887890cda 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -14,10 +14,12 @@ #define DEBUG_TYPE "dwarfdebug" #include "DwarfDebug.h" #include "DIE.h" +#include "DwarfAccelTable.h" #include "DwarfCompileUnit.h" #include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Instructions.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCAsmInfo.h" @@ -52,6 +54,10 @@ static cl::opt UnknownLocations("use-unknown-locations", cl::Hidden, cl::desc("Make an absence of debug location information explicit."), cl::init(false)); +static cl::opt DwarfAccelTables("dwarf-accel-tables", cl::Hidden, + cl::desc("Output prototype dwarf accelerator tables."), + cl::init(false)); + namespace { const char *DWARFGroupName = "DWARF Emission"; const char *DbgTimerName = "DWARF Debug Writer"; @@ -128,6 +134,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfStrSectionSym = TextSectionSym = 0; DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; + + // Turn on accelerator tables for Darwin. + if (Triple(M->getTargetTriple()).isOSDarwin()) + DwarfAccelTables = true; + { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(M); @@ -136,6 +147,22 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfDebug::~DwarfDebug() { } +/// EmitSectionSym - Switch to the specified MCSection and emit an assembler +/// temporary label to it if SymbolStem is specified. +static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section, + const char *SymbolStem = 0) { + Asm->OutStreamer.SwitchSection(Section); + if (!SymbolStem) return 0; + + MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem); + Asm->OutStreamer.EmitLabel(TmpSym); + return TmpSym; +} + +MCSymbol *DwarfDebug::getStringPool() { + return Asm->GetTempSymbol("section_str"); +} + MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) { std::pair &Entry = StringPool[Str]; if (Entry.first) return Entry.first; @@ -144,7 +171,6 @@ MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) { return Entry.first = Asm->GetTempSymbol("string", Entry.second); } - /// assignAbbrevNumber - Define a unique number for the abbreviation. /// void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) { @@ -178,6 +204,63 @@ static StringRef getRealLinkageName(StringRef LinkageName) { return LinkageName; } +static bool isObjCClass(StringRef Name) { + return Name.startswith("+") || Name.startswith("-"); +} + +static bool hasObjCCategory(StringRef Name) { + if (!isObjCClass(Name)) return false; + + size_t pos = Name.find(')'); + if (pos != std::string::npos) { + if (Name[pos+1] != ' ') return false; + return true; + } + return false; +} + +static void getObjCClassCategory(StringRef In, StringRef &Class, + StringRef &Category) { + if (!hasObjCCategory(In)) { + Class = In.slice(In.find('[') + 1, In.find(' ')); + Category = ""; + return; + } + + Class = In.slice(In.find('[') + 1, In.find('(')); + Category = In.slice(In.find('[') + 1, In.find(' ')); + return; +} + +static StringRef getObjCMethodName(StringRef In) { + return In.slice(In.find(' ') + 1, In.find(']')); +} + +// Add the various names to the Dwarf accelerator table names. +static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP, + DIE* Die) { + if (!SP.isDefinition()) return; + + TheCU->addAccelName(SP.getName(), Die); + + // If the linkage name is different than the name, go ahead and output + // that as well into the name table. + if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName()) + TheCU->addAccelName(SP.getLinkageName(), Die); + + // If this is an Objective-C selector name add it to the ObjC accelerator + // too. + if (isObjCClass(SP.getName())) { + StringRef Class, Category; + getObjCClassCategory(SP.getName(), Class, Category); + TheCU->addAccelObjC(Class, Die); + if (Category != "") + TheCU->addAccelObjC(Category, Die); + // Also add the base method name to the name table. + TheCU->addAccelName(getObjCMethodName(SP.getName()), Die); + } +} + /// updateSubprogramScopeDIE - Find DIE for the given subprogram and /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes. /// If there are global variables in this scope then create and insert @@ -190,11 +273,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP(SPNode); DISubprogram SPDecl = SP.getFunctionDeclaration(); - if (SPDecl.isSubprogram()) - // Refer function declaration directly. - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, - SPCU->getOrCreateSubprogramDIE(SPDecl)); - else { + if (!SPDecl.isSubprogram()) { // There is not any need to generate specification DIE for a function // defined at compile unit level. If a function is defined inside another // function then gdb prefers the definition at top level and but does not @@ -203,7 +282,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, if (SP.isDefinition() && !SP.getContext().isCompileUnit() && !SP.getContext().isFile() && !isSubprogramContext(SP.getContext())) { - SPCU-> addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); // Add arguments. DICompositeType SPTy = SP.getType(); @@ -241,6 +320,10 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, MachineLocation Location(RI->getFrameRegister(*Asm->MF)); SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location); + // Add name to the name table, we do this here because we're guaranteed + // to have concrete versions of our DW_TAG_subprogram nodes. + addSubprogramNames(SPCU, SP, SPDie); + return SPDie; } @@ -248,7 +331,6 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels. DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { - DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block); if (Scope->isAbstractScope()) return ScopeDIE; @@ -294,10 +376,9 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, /// of the function. DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { - const SmallVector &Ranges = Scope->getRanges(); - assert (Ranges.empty() == false - && "LexicalScope does not have instruction markers!"); + assert(Ranges.empty() == false && + "LexicalScope does not have instruction markers!"); if (!Scope->getScopeNode()) return NULL; @@ -314,8 +395,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, const MCSymbol *EndLabel = getLabelAfterInsn(RI->second); if (StartLabel == 0 || EndLabel == 0) { - assert (0 && "Unexpected Start and End labels for a inlined scope!"); - return 0; + llvm_unreachable("Unexpected Start and End labels for a inlined scope!"); } assert(StartLabel->isDefined() && "Invalid starting label for an inlined scope!"); @@ -358,16 +438,20 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, I = InlineInfo.find(InlinedSP); if (I == InlineInfo.end()) { - InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, - ScopeDIE)); + InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE)); InlinedSPNodes.push_back(InlinedSP); } else I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); DILocation DL(Scope->getInlinedAt()); - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, + GetOrCreateSourceID(DL.getFilename(), DL.getDirectory())); TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); + // Add name to the name table, we do this here because we're guaranteed + // to have concrete versions of our DW_TAG_inlined_subprogram nodes. + addSubprogramNames(TheCU, InlinedSP, ScopeDIE); + return ScopeDIE; } @@ -376,7 +460,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { if (!Scope || !Scope->getScopeNode()) return NULL; - SmallVector Children; + SmallVector Children; // Collect arguments for current function. if (LScopes.isCurrentFunctionScope(Scope)) @@ -426,39 +510,39 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { ScopeDIE->addChild(*I); if (DS.isSubprogram()) - TheCU->addPubTypes(DISubprogram(DS)); + TheCU->addPubTypes(DISubprogram(DS)); - return ScopeDIE; + return ScopeDIE; } /// GetOrCreateSourceID - Look up the source id with the given directory and /// source file names. If none currently exists, create a new id and insert it /// in the SourceIds map. This can update DirectoryNames and SourceFileNames /// maps as well. - unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, StringRef DirName) { // If FE did not provide a file name, then assume stdin. if (FileName.empty()) return GetOrCreateSourceID("", StringRef()); - // MCStream expects full path name as filename. - if (!DirName.empty() && !sys::path::is_absolute(FileName)) { - SmallString<128> FullPathName = DirName; - sys::path::append(FullPathName, FileName); - // Here FullPathName will be copied into StringMap by GetOrCreateSourceID. - return GetOrCreateSourceID(StringRef(FullPathName), StringRef()); - } + // TODO: this might not belong here. See if we can factor this better. + if (DirName == CompilationDir) + DirName = ""; - StringMapEntry &Entry = SourceIdMap.GetOrCreateValue(FileName); - if (Entry.getValue()) - return Entry.getValue(); + unsigned SrcId = SourceIdMap.size()+1; - unsigned SrcId = SourceIdMap.size(); - Entry.setValue(SrcId); + // We look up the file/dir pair by concatenating them with a zero byte. + SmallString<128> NamePair; + NamePair += DirName; + NamePair += '\0'; // Zero bytes are not allowed in paths. + NamePair += FileName; + + StringMapEntry &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId); + if (Ent.getValue() != SrcId) + return Ent.getValue(); // Print out a .file directive to specify files for .loc directives. - Asm->OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey()); + Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName); return SrcId; } @@ -468,39 +552,36 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { DICompileUnit DIUnit(N); StringRef FN = DIUnit.getFilename(); - StringRef Dir = DIUnit.getDirectory(); - unsigned ID = GetOrCreateSourceID(FN, Dir); + CompilationDir = DIUnit.getDirectory(); + unsigned ID = GetOrCreateSourceID(FN, CompilationDir); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(ID, Die, Asm, this); - NewCU->addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, - DIUnit.getProducer()); + CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die, Asm, this); + NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer()); NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, DIUnit.getLanguage()); - NewCU->addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); - // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This - // simplifies debug range entries. - NewCU->addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); + NewCU->addString(Die, dwarf::DW_AT_name, FN); + // 2.17.1 requires that we use DW_AT_low_pc for a single entry point + // into an entity. + NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. - if(Asm->MAI->doesDwarfRequireRelocationForSectionOffset()) + if (Asm->MAI->doesDwarfRequireRelocationForSectionOffset()) NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, Asm->GetTempSymbol("section_line")); else NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); - if (!Dir.empty()) - NewCU->addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); + if (!CompilationDir.empty()) + NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); if (DIUnit.isOptimized()) NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); StringRef Flags = DIUnit.getFlags(); if (!Flags.empty()) - NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, - Flags); + NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, Flags); - unsigned RVer = DIUnit.getRunTimeVersion(); - if (RVer) + if (unsigned RVer = DIUnit.getRunTimeVersion()) NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, dwarf::DW_FORM_data1, RVer); @@ -513,6 +594,11 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { /// construct SubprogramDIE - Construct subprogram DIE. void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N) { + CompileUnit *&CURef = SPMap[N]; + if (CURef) + return; + CURef = TheCU; + DISubprogram SP(N); if (!SP.isDefinition()) // This is a method declaration which will be handled while constructing @@ -527,10 +613,6 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, // Add to context owner. TheCU->addToContextOwner(SubprogramDie, SP.getContext()); - // Expose as global. - TheCU->addGlobal(SP.getName(), SubprogramDie); - - SPMap[N] = TheCU; return; } @@ -676,7 +758,7 @@ void DwarfDebug::endModule() { // Construct subprogram DIE and add variables DIEs. CompileUnit *SPCU = CUMap.lookup(TheCU); - assert (SPCU && "Unable to find Compile Unit!"); + assert(SPCU && "Unable to find Compile Unit!"); constructSubprogramDIE(SPCU, SP); DIE *ScopeDIE = SPCU->getDIE(SP); for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { @@ -697,6 +779,13 @@ void DwarfDebug::endModule() { DIE *ISP = *AI; FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); } + for (DenseMap::iterator AI = AbstractSPDies.begin(), + AE = AbstractSPDies.end(); AI != AE; ++AI) { + DIE *ISP = AI->second; + if (InlinedSubprogramDIEs.count(ISP)) + continue; + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + } // Emit DW_AT_containing_type attribute to connect types with their // vtable holding type. @@ -727,9 +816,14 @@ void DwarfDebug::endModule() { // Corresponding abbreviations into a abbrev section. emitAbbreviations(); - // Emit info into a debug pubnames section. - emitDebugPubNames(); - + // Emit info into a dwarf accelerator table sections. + if (DwarfAccelTables) { + emitAccelNames(); + emitAccelObjC(); + emitAccelNamespaces(); + emitAccelTypes(); + } + // Emit info into a debug pubtypes section. emitDebugPubTypes(); @@ -837,7 +931,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF, /// isDbgValueInDefinedReg - Return true if debug value, encoded by /// DBG_VALUE instruction, is in a defined reg. static bool isDbgValueInDefinedReg(const MachineInstr *MI) { - assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); + assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); return MI->getNumOperands() == 3 && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() && MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0; @@ -867,8 +961,7 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, if (MI->getOperand(0).isCImm()) return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm()); - assert (0 && "Unexpected 3 operand DBG_VALUE instruction!"); - return DotDebugLocEntry(); + llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!"); } /// collectVariableInfo - Find variables for each lexical scope. @@ -964,7 +1057,8 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, } // The value is valid until the next DBG_VALUE or clobber. - DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, Begin)); + DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, + Begin)); } DotDebugLocEntries.push_back(DotDebugLocEntry()); } @@ -999,12 +1093,15 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { if (!MI->isDebugValue()) { DebugLoc DL = MI->getDebugLoc(); if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) { - unsigned Flags = DWARF2_FLAG_IS_STMT; + unsigned Flags = 0; PrevInstLoc = DL; if (DL == PrologEndLoc) { Flags |= DWARF2_FLAG_PROLOGUE_END; PrologEndLoc = DebugLoc(); } + if (PrologEndLoc.isUnknown()) + Flags |= DWARF2_FLAG_IS_STMT; + if (!DL.isUnknown()) { const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags); @@ -1099,12 +1196,19 @@ static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) { } /// getFnDebugLoc - Walk up the scope chain of given debug loc and find -/// line number info for the function. +/// line number info for the function. static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { const MDNode *Scope = getScopeNode(DL, Ctx); DISubprogram SP = getDISubprogram(Scope); - if (SP.Verify()) - return DebugLoc::get(SP.getLineNumber(), 0, SP); + if (SP.Verify()) { + // Check for number of operands since the compatibility is + // cheap here. + if (SP->getNumOperands() > 19) + return DebugLoc::get(SP.getScopeLineNumber(), 0, SP); + else + return DebugLoc::get(SP.getLineNumber(), 0, SP); + } + return DebugLoc(); } @@ -1135,7 +1239,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { const MachineInstr *MI = II; if (MI->isDebugValue()) { - assert (MI->getNumOperands() > 1 && "Invalid machine instruction!"); + assert(MI->getNumOperands() > 1 && "Invalid machine instruction!"); // Keep track of user variables. const MDNode *Var = @@ -1206,7 +1310,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { MOE = MI->operands_end(); MOI != MOE; ++MOI) { if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg()) continue; - for (const unsigned *AI = TRI->getOverlaps(MOI->getReg()); + for (const uint16_t *AI = TRI->getOverlaps(MOI->getReg()); unsigned Reg = *AI; ++AI) { const MDNode *Var = LiveUserVar[Reg]; if (!Var) @@ -1277,7 +1381,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { MF->getFunction()->getContext()); recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(), FnStartDL.getScope(MF->getFunction()->getContext()), - DWARF2_FLAG_IS_STMT); + 0); } } @@ -1303,7 +1407,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); - assert (TheCU && "Unable to find compile unit!"); + assert(TheCU && "Unable to find compile unit!"); // Construct abstract scopes. ArrayRef AList = LScopes.getAbstractScopesList(); @@ -1327,7 +1431,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope); - if (!DisableFramePointerElim(*MF)) + if (!MF->getTarget().Options.DisableFramePointerElim(*MF)) TheCU->addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, dwarf::DW_FORM_flag, 1); @@ -1380,7 +1484,7 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, Fn = DB.getFilename(); Dir = DB.getDirectory(); } else - assert(0 && "Unexpected scope info"); + llvm_unreachable("Unexpected scope info"); Src = GetOrCreateSourceID(Fn, Dir); } @@ -1398,10 +1502,6 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) { // Get the children. const std::vector &Children = Die->getChildren(); - // If not last sibling and has children then add sibling offset attribute. - if (!Last && !Children.empty()) - Die->addSiblingOffset(DIEValueAllocator); - // Record the abbreviation. assignAbbrevNumber(Die->getAbbrev()); @@ -1454,18 +1554,6 @@ void DwarfDebug::computeSizeAndOffsets() { } } -/// EmitSectionSym - Switch to the specified MCSection and emit an assembler -/// temporary label to it if SymbolStem is specified. -static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section, - const char *SymbolStem = 0) { - Asm->OutStreamer.SwitchSection(Section); - if (!SymbolStem) return 0; - - MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem); - Asm->OutStreamer.EmitLabel(TmpSym); - return TmpSym; -} - /// EmitSectionLabels - Emit initial Dwarf sections with a label at /// the start of each one. void DwarfDebug::EmitSectionLabels() { @@ -1483,7 +1571,6 @@ void DwarfDebug::EmitSectionLabels() { EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); EmitSectionSym(Asm, TLOF.getDwarfLocSection()); - EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); DwarfStrSectionSym = EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str"); @@ -1525,9 +1612,6 @@ void DwarfDebug::emitDIE(DIE *Die) { Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr)); switch (Attr) { - case dwarf::DW_AT_sibling: - Asm->EmitInt32(Die->getSiblingOffset()); - break; case dwarf::DW_AT_abstract_origin: { DIEEntry *E = cast(Values[i]); DIE *Origin = E->getEntry(); @@ -1539,7 +1623,7 @@ void DwarfDebug::emitDIE(DIE *Die) { // DW_AT_range Value encodes offset in debug_range section. DIEInteger *V = cast(Values[i]); - if (Asm->MAI->doesDwarfUsesLabelOffsetForRanges()) { + if (Asm->MAI->doesDwarfUseLabelOffsetForRanges()) { Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym, V->getValue(), 4); @@ -1678,62 +1762,133 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { Asm->EmitInt8(1); } -/// emitDebugPubNames - Emit visible names into a debug pubnames section. -/// -void DwarfDebug::emitDebugPubNames() { +/// emitAccelNames - Emit visible names into a hashed accelerator table +/// section. +void DwarfDebug::emitAccelNames() { + DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + dwarf::DW_FORM_data4)); for (DenseMap::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { CompileUnit *TheCU = I->second; - // Start the dwarf pubnames section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfPubNamesSection()); - - Asm->OutStreamer.AddComment("Length of Public Names Info"); - Asm->EmitLabelDifference( - Asm->GetTempSymbol("pubnames_end", TheCU->getID()), - Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", - TheCU->getID())); - - Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - - Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()), - DwarfInfoSectionSym); - - Asm->OutStreamer.AddComment("Compilation Unit Length"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()), - Asm->GetTempSymbol("info_begin", TheCU->getID()), - 4); - - const StringMap &Globals = TheCU->getGlobals(); - for (StringMap::const_iterator - GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + const StringMap > &Names = TheCU->getAccelNames(); + for (StringMap >::const_iterator + GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); - DIE *Entity = GI->second; - - Asm->OutStreamer.AddComment("DIE offset"); - Asm->EmitInt32(Entity->getOffset()); - - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("External Name"); - Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0); + const std::vector &Entities = GI->second; + for (std::vector::const_iterator DI = Entities.begin(), + DE = Entities.end(); DI != DE; ++DI) + AT.AddName(Name, (*DI)); } - - Asm->OutStreamer.AddComment("End Mark"); - Asm->EmitInt32(0); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", - TheCU->getID())); } + + AT.FinalizeTable(Asm, "Names"); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfAccelNamesSection()); + MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin"); + Asm->OutStreamer.EmitLabel(SectionBegin); + + // Emit the full data. + AT.Emit(Asm, SectionBegin, this); +} + +/// emitAccelObjC - Emit objective C classes and categories into a hashed +/// accelerator table section. +void DwarfDebug::emitAccelObjC() { + DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + dwarf::DW_FORM_data4)); + for (DenseMap::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + const StringMap > &Names = TheCU->getAccelObjC(); + for (StringMap >::const_iterator + GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + const std::vector &Entities = GI->second; + for (std::vector::const_iterator DI = Entities.begin(), + DE = Entities.end(); DI != DE; ++DI) + AT.AddName(Name, (*DI)); + } + } + + AT.FinalizeTable(Asm, "ObjC"); + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() + .getDwarfAccelObjCSection()); + MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin"); + Asm->OutStreamer.EmitLabel(SectionBegin); + + // Emit the full data. + AT.Emit(Asm, SectionBegin, this); +} + +/// emitAccelNamespace - Emit namespace dies into a hashed accelerator +/// table. +void DwarfDebug::emitAccelNamespaces() { + DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + dwarf::DW_FORM_data4)); + for (DenseMap::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + const StringMap > &Names = TheCU->getAccelNamespace(); + for (StringMap >::const_iterator + GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + const std::vector &Entities = GI->second; + for (std::vector::const_iterator DI = Entities.begin(), + DE = Entities.end(); DI != DE; ++DI) + AT.AddName(Name, (*DI)); + } + } + + AT.FinalizeTable(Asm, "namespac"); + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() + .getDwarfAccelNamespaceSection()); + MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin"); + Asm->OutStreamer.EmitLabel(SectionBegin); + + // Emit the full data. + AT.Emit(Asm, SectionBegin, this); +} + +/// emitAccelTypes() - Emit type dies into a hashed accelerator table. +void DwarfDebug::emitAccelTypes() { + std::vector Atoms; + Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + dwarf::DW_FORM_data4)); + Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTag, + dwarf::DW_FORM_data2)); + Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTypeFlags, + dwarf::DW_FORM_data1)); + DwarfAccelTable AT(Atoms); + for (DenseMap::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + const StringMap > > &Names + = TheCU->getAccelTypes(); + for (StringMap > >::const_iterator + GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + const std::vector > &Entities = GI->second; + for (std::vector >::const_iterator DI + = Entities.begin(), DE = Entities.end(); DI !=DE; ++DI) + AT.AddName(Name, (*DI).first, (*DI).second); + } + } + + AT.FinalizeTable(Asm, "types"); + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() + .getDwarfAccelTypesSection()); + MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin"); + Asm->OutStreamer.EmitLabel(SectionBegin); + + // Emit the full data. + AT.Emit(Asm, SectionBegin, this); } void DwarfDebug::emitDebugPubTypes() { for (DenseMap::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { CompileUnit *TheCU = I->second; - // Start the dwarf pubnames section. + // Start the dwarf pubtypes section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfPubTypesSection()); Asm->OutStreamer.AddComment("Length of Public Types Info"); @@ -1766,6 +1921,7 @@ void DwarfDebug::emitDebugPubTypes() { Asm->EmitInt32(Entity->getOffset()); if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); + // Emit the name with a terminating null byte. Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0); } @@ -1801,8 +1957,10 @@ void DwarfDebug::emitDebugStr() { // Emit a label for reference from debug information entries. Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first); - // Emit the string itself. - Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/); + // Emit the string itself with a terminating null byte. + Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(), + Entries[i].second->getKeyLength()+1), + 0/*addrspace*/); } } @@ -1958,7 +2116,7 @@ void DwarfDebug::emitDebugMacInfo() { /// __debug_info section, and the low_pc is the starting address for the /// inlining instance. void DwarfDebug::emitDebugInlineInfo() { - if (!Asm->MAI->doesDwarfUsesInlineInfoSection()) + if (!Asm->MAI->doesDwarfUseInlineInfoSection()) return; if (!FirstCU) @@ -1990,10 +2148,9 @@ void DwarfDebug::emitDebugInlineInfo() { StringRef Name = SP.getName(); Asm->OutStreamer.AddComment("MIPS linkage name"); - if (LName.empty()) { - Asm->OutStreamer.EmitBytes(Name, 0); - Asm->OutStreamer.EmitIntValue(0, 1, 0); // nul terminator. - } else + if (LName.empty()) + Asm->EmitSectionOffset(getStringPoolEntry(Name), DwarfStrSectionSym); + else Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)), DwarfStrSectionSym); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 35653be5c897..83f30f5b446f 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -30,7 +30,8 @@ namespace llvm { class CompileUnit; -class DbgConcreteScope; +class ConstantInt; +class ConstantFP; class DbgVariable; class MachineFrameInfo; class MachineModuleInfo; @@ -207,8 +208,8 @@ class DwarfDebug { /// std::vector Abbreviations; - /// SourceIdMap - Source id map, i.e. pair of directory id and source file - /// id mapped to a unique id. + /// SourceIdMap - Source id map, i.e. pair of source filename and directory, + /// separated by a zero byte, mapped to a unique id. StringMap SourceIdMap; /// StringPool - A String->Symbol mapping of strings used by indirect @@ -216,8 +217,6 @@ class DwarfDebug { StringMap > StringPool; unsigned NextStringPoolNumber; - MCSymbol *getStringPoolEntry(StringRef Str); - /// SectionMap - Provides a unique id per text section. /// UniqueVector SectionMap; @@ -239,12 +238,12 @@ class DwarfDebug { /// DotDebugLocEntries - Collection of DotDebugLocEntry. SmallVector DotDebugLocEntries; - /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked + /// InlinedSubprogramDIEs - Collection of subprogram DIEs that are marked /// (at the end of the module) as DW_AT_inline. SmallPtrSet InlinedSubprogramDIEs; /// InlineInfo - Keep track of inlined functions and their location. This - /// information is used to populate debug_inlined section. + /// information is used to populate the debug_inlined section. typedef std::pair InlineInfoLabels; DenseMap > InlineInfo; SmallVector InlinedSPNodes; @@ -304,6 +303,10 @@ class DwarfDebug { MCSymbol *DwarfDebugLocSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; + // As an optimization, there is no need to emit an entry in the directory + // table for the same directory as DW_at_comp_dir. + StringRef CompilationDir; + private: /// assignAbbrevNumber - Define a unique number for the abbreviation. @@ -340,7 +343,7 @@ class DwarfDebug { /// the start of each one. void EmitSectionLabels(); - /// emitDIE - Recusively Emits a debug information entry. + /// emitDIE - Recursively Emits a debug information entry. /// void emitDIE(DIE *Die); @@ -365,10 +368,22 @@ class DwarfDebug { /// void emitEndOfLineMatrix(unsigned SectionEnd); - /// emitDebugPubNames - Emit visible names into a debug pubnames section. - /// - void emitDebugPubNames(); + /// emitAccelNames - Emit visible names into a hashed accelerator table + /// section. + void emitAccelNames(); + + /// emitAccelObjC - Emit objective C classes and categories into a hashed + /// accelerator table section. + void emitAccelObjC(); + /// emitAccelNamespace - Emit namespace dies into a hashed accelerator + /// table. + void emitAccelNamespaces(); + + /// emitAccelTypes() - Emit type dies into a hashed accelerator table. + /// + void emitAccelTypes(); + /// emitDebugPubTypes - Emit visible types into a debug pubtypes section. /// void emitDebugPubTypes(); @@ -407,10 +422,10 @@ class DwarfDebug { /// 3. an unsigned LEB128 number indicating the number of distinct inlining /// instances for the function. /// - /// The rest of the entry consists of a {die_offset, low_pc} pair for each + /// The rest of the entry consists of a {die_offset, low_pc} pair for each /// inlined instance; the die_offset points to the inlined_subroutine die in - /// the __debug_info section, and the low_pc is the starting address for the - /// inlining instance. + /// the __debug_info section, and the low_pc is the starting address for the + /// inlining instance. void emitDebugInlineInfo(); /// constructCompileUnit - Create new CompileUnit for the given @@ -426,8 +441,8 @@ class DwarfDebug { void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope, unsigned Flags); - /// identifyScopeMarkers() - Indentify instructions that are marking - /// beginning of or end of a scope. + /// identifyScopeMarkers() - Indentify instructions that are marking the + /// beginning of or ending of a scope. void identifyScopeMarkers(); /// addCurrentFnArgument - If Var is an current function argument that add @@ -472,7 +487,7 @@ class DwarfDebug { void collectInfoFromNamedMDNodes(Module *M); /// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder. - /// FIXME - Remove this when dragon-egg and llvm-gcc switch to DIBuilder. + /// FIXME - Remove this when DragonEgg switches to DIBuilder. bool collectLegacyDebugInfo(Module *M); /// beginModule - Emit all Dwarf sections that should come prior to the @@ -504,6 +519,13 @@ class DwarfDebug { /// createSubprogramDIE - Create new DIE using SP. DIE *createSubprogramDIE(DISubprogram SP); + + /// getStringPool - returns the entry into the start of the pool. + MCSymbol *getStringPool(); + + /// getStringPoolEntry - returns an entry into the string pool with the given + /// string text. + MCSymbol *getStringPoolEntry(StringRef Str); }; } // End of namespace llvm diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp index 18b726b173dc..70cc2e56b3e1 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -31,6 +31,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -184,7 +185,7 @@ ComputeActionsTable(const SmallVectorImpl &LandingPads, /// CallToNoUnwindFunction - Return `true' if this is a call to a function /// marked `nounwind'. Return `false' otherwise. bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { - assert(MI->getDesc().isCall() && "This should be a call instruction!"); + assert(MI->isCall() && "This should be a call instruction!"); bool MarkedNoUnwind = false; bool SawFunc = false; @@ -243,7 +244,7 @@ ComputeCallSiteTable(SmallVectorImpl &CallSites, for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); MI != E; ++MI) { if (!MI->isLabel()) { - if (MI->getDesc().isCall()) + if (MI->isCall()) SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI); continue; } @@ -529,10 +530,8 @@ void DwarfException::EmitExceptionTable() { // Offset of the landing pad, counted in 16-byte bundles relative to the // @LPStart address. if (VerboseAsm) { - Asm->OutStreamer.AddComment(Twine(">> Call Site ") + - llvm::utostr(idx) + " <<"); - Asm->OutStreamer.AddComment(Twine(" On exception at call site ") + - llvm::utostr(idx)); + Asm->OutStreamer.AddComment(">> Call Site " + Twine(idx) + " <<"); + Asm->OutStreamer.AddComment(" On exception at call site "+Twine(idx)); } Asm->EmitULEB128(idx); @@ -543,8 +542,8 @@ void DwarfException::EmitExceptionTable() { if (S.Action == 0) Asm->OutStreamer.AddComment(" Action: cleanup"); else - Asm->OutStreamer.AddComment(Twine(" Action: ") + - llvm::utostr((S.Action - 1) / 2 + 1)); + Asm->OutStreamer.AddComment(" Action: " + + Twine((S.Action - 1) / 2 + 1)); } Asm->EmitULEB128(S.Action); } @@ -596,8 +595,7 @@ void DwarfException::EmitExceptionTable() { // number of 16-byte bundles. The first call site is counted relative to // the start of the procedure fragment. if (VerboseAsm) - Asm->OutStreamer.AddComment(Twine(">> Call Site ") + - llvm::utostr(++Entry) + " <<"); + Asm->OutStreamer.AddComment(">> Call Site " + Twine(++Entry) + " <<"); Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4); if (VerboseAsm) Asm->OutStreamer.AddComment(Twine(" Call between ") + @@ -625,8 +623,8 @@ void DwarfException::EmitExceptionTable() { if (S.Action == 0) Asm->OutStreamer.AddComment(" On action: cleanup"); else - Asm->OutStreamer.AddComment(Twine(" On action: ") + - llvm::utostr((S.Action - 1) / 2 + 1)); + Asm->OutStreamer.AddComment(" On action: " + + Twine((S.Action - 1) / 2 + 1)); } Asm->EmitULEB128(S.Action); } @@ -640,8 +638,7 @@ void DwarfException::EmitExceptionTable() { if (VerboseAsm) { // Emit comments that decode the action table. - Asm->OutStreamer.AddComment(Twine(">> Action Record ") + - llvm::utostr(++Entry) + " <<"); + Asm->OutStreamer.AddComment(">> Action Record " + Twine(++Entry) + " <<"); } // Type Filter @@ -650,11 +647,11 @@ void DwarfException::EmitExceptionTable() { // type of the catch clauses or the types in the exception specification. if (VerboseAsm) { if (Action.ValueForTypeID > 0) - Asm->OutStreamer.AddComment(Twine(" Catch TypeInfo ") + - llvm::itostr(Action.ValueForTypeID)); + Asm->OutStreamer.AddComment(" Catch TypeInfo " + + Twine(Action.ValueForTypeID)); else if (Action.ValueForTypeID < 0) - Asm->OutStreamer.AddComment(Twine(" Filter TypeInfo ") + - llvm::itostr(Action.ValueForTypeID)); + Asm->OutStreamer.AddComment(" Filter TypeInfo " + + Twine(Action.ValueForTypeID)); else Asm->OutStreamer.AddComment(" Cleanup"); } @@ -669,8 +666,7 @@ void DwarfException::EmitExceptionTable() { Asm->OutStreamer.AddComment(" No further actions"); } else { unsigned NextAction = Entry + (Action.NextAction + 1) / 2; - Asm->OutStreamer.AddComment(Twine(" Continue to action ") + - llvm::utostr(NextAction)); + Asm->OutStreamer.AddComment(" Continue to action "+Twine(NextAction)); } } Asm->EmitSLEB128(Action.NextAction); @@ -687,7 +683,7 @@ void DwarfException::EmitExceptionTable() { I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { const GlobalVariable *GV = *I; if (VerboseAsm) - Asm->OutStreamer.AddComment(Twine("TypeInfo ") + llvm::utostr(Entry--)); + Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--)); if (GV) Asm->EmitReference(GV, TTypeEncoding); else @@ -707,7 +703,7 @@ void DwarfException::EmitExceptionTable() { if (VerboseAsm) { --Entry; if (TypeID != 0) - Asm->OutStreamer.AddComment(Twine("FilterInfo ") + llvm::itostr(Entry)); + Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry)); } Asm->EmitULEB128(TypeID); @@ -719,17 +715,17 @@ void DwarfException::EmitExceptionTable() { /// EndModule - Emit all exception information that should come after the /// content. void DwarfException::EndModule() { - assert(0 && "Should be implemented"); + llvm_unreachable("Should be implemented"); } /// BeginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. void DwarfException::BeginFunction(const MachineFunction *MF) { - assert(0 && "Should be implemented"); + llvm_unreachable("Should be implemented"); } /// EndFunction - Gather and emit post-function exception information. /// void DwarfException::EndFunction() { - assert(0 && "Should be implemented"); + llvm_unreachable("Should be implemented"); } diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp index 75288b0934cb..ef1d2baed9ce 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -61,29 +62,33 @@ TailMergeSize("tail-merge-size", namespace { /// BranchFolderPass - Wrap branch folder in a machine function pass. - class BranchFolderPass : public MachineFunctionPass, - public BranchFolder { + class BranchFolderPass : public MachineFunctionPass { public: static char ID; - explicit BranchFolderPass(bool defaultEnableTailMerge) - : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge, true) {} + explicit BranchFolderPass(): MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { return "Control Flow Optimizer"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } }; } char BranchFolderPass::ID = 0; +char &llvm::BranchFolderPassID = BranchFolderPass::ID; -FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { - return new BranchFolderPass(DefaultEnableTailMerge); -} +INITIALIZE_PASS(BranchFolderPass, "branch-folder", + "Control Flow Optimizer", false, false) bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { - return OptimizeFunction(MF, - MF.getTarget().getInstrInfo(), - MF.getTarget().getRegisterInfo(), - getAnalysisIfAvailable()); + TargetPassConfig *PassConfig = &getAnalysis(); + BranchFolder Folder(PassConfig->getEnableTailMerge(), /*CommonHoist=*/true); + return Folder.OptimizeFunction(MF, + MF.getTarget().getInstrInfo(), + MF.getTarget().getRegisterInfo(), + getAnalysisIfAvailable()); } @@ -132,7 +137,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { break; unsigned Reg = I->getOperand(0).getReg(); ImpDefRegs.insert(Reg); - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) ImpDefRegs.insert(SubReg); ++I; @@ -179,8 +184,14 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, TII = tii; TRI = tri; MMI = mmi; + RS = NULL; - RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL; + // Use a RegScavenger to help update liveness when required. + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (MRI.tracksLiveness() && TRI->requiresRegisterScavenging(MF)) + RS = new RegScavenger(); + else + MRI.invalidateLiveness(); // Fix CFG. The later algorithms expect it to be right. bool MadeChange = false; @@ -208,7 +219,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, delete RS; return MadeChange; } - + // Walk the function to find jump tables that are live. BitVector JTIsLive(JTI->getJumpTables().size()); for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); @@ -432,10 +443,9 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, for (; I != E; ++I) { if (I->isDebugValue()) continue; - const MCInstrDesc &MCID = I->getDesc(); - if (MCID.isCall()) + if (I->isCall()) Time += 10; - else if (MCID.mayLoad() || MCID.mayStore()) + else if (I->mayLoad() || I->mayStore()) Time += 2; else ++Time; @@ -484,8 +494,9 @@ BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const { // an object with itself. #ifndef _GLIBCXX_DEBUG llvm_unreachable("Predecessor appears twice"); -#endif +#else return false; +#endif } } @@ -502,7 +513,7 @@ static unsigned CountTerminators(MachineBasicBlock *MBB, break; } --I; - if (!I->getDesc().isTerminator()) break; + if (!I->isTerminator()) break; ++NumTerms; } return NumTerms; @@ -550,8 +561,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, // heuristics. unsigned EffectiveTailLen = CommonTailLen; if (SuccBB && MBB1 != PredBB && MBB2 != PredBB && - !MBB1->back().getDesc().isBarrier() && - !MBB2->back().getDesc().isBarrier()) + !MBB1->back().isBarrier() && + !MBB2->back().isBarrier()) ++EffectiveTailLen; // Check if the common tail is long enough to be worthwhile. @@ -870,6 +881,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // Visit each predecessor only once. if (!UniquePreds.insert(PBB)) continue; + // Skip blocks which may jump to a landing pad. Can't tail merge these. + if (PBB->getLandingPadSuccessor()) + continue; MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector Cond; if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { @@ -924,8 +938,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (MergePotentials.size() >= 2) MadeChange |= TryTailMergeBlocks(IBB, PredBB); // Reinsert an unconditional branch if needed. - // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks. - PredBB = prior(I); // this may have been changed in TryTailMergeBlocks + // The 1 below can occur as a result of removing blocks in + // TryTailMergeBlocks. + PredBB = prior(I); // this may have been changed in TryTailMergeBlocks if (MergePotentials.size() == 1 && MergePotentials.begin()->getBlock() != PredBB) FixTail(MergePotentials.begin()->getBlock(), IBB, TII); @@ -980,7 +995,7 @@ static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) { if (!MBBI->isDebugValue()) break; } - return (MBBI->getDesc().isBranch()); + return (MBBI->isBranch()); } /// IsBetterFallthrough - Return true if it would be clearly better to @@ -1008,7 +1023,23 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, MachineBasicBlock::iterator MBB2I = --MBB2->end(); while (MBB2I->isDebugValue()) --MBB2I; - return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall(); + return MBB2I->isCall() && !MBB1I->isCall(); +} + +/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch +/// instructions on the block. Always use the DebugLoc of the first +/// branching instruction found unless its absent, in which case use the +/// DebugLoc of the second if present. +static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return DebugLoc(); + --I; + while (I->isDebugValue() && I != MBB.begin()) + --I; + if (I->isBranch()) + return I->getDebugLoc(); + return DebugLoc(); } /// OptimizeBlock - Analyze and optimize control flow related to the specified @@ -1016,7 +1047,6 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { bool MadeChange = false; MachineFunction &MF = *MBB->getParent(); - DebugLoc dl; // FIXME: this is nowhere ReoptimizeBlock: MachineFunction::iterator FallThrough = MBB; @@ -1065,6 +1095,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // destination, remove the branch, replacing it with an unconditional one or // a fall-through. if (PriorTBB && PriorTBB == PriorFBB) { + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); PriorCond.clear(); if (PriorTBB != MBB) @@ -1091,7 +1122,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { MachineBasicBlock::iterator PrevBBIter = PrevBB.end(); --PrevBBIter; MachineBasicBlock::iterator MBBIter = MBB->begin(); - // Check if DBG_VALUE at the end of PrevBB is identical to the + // Check if DBG_VALUE at the end of PrevBB is identical to the // DBG_VALUE at the beginning of MBB. while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end() && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) { @@ -1103,7 +1134,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { } } PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end()); - PrevBB.removeSuccessor(PrevBB.succ_begin());; + PrevBB.removeSuccessor(PrevBB.succ_begin()); assert(PrevBB.succ_empty()); PrevBB.transferSuccessors(MBB); MadeChange = true; @@ -1122,6 +1153,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // If the prior block branches somewhere else on the condition and here if // the condition is false, remove the uncond second branch. if (PriorFBB == MBB) { + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); MadeChange = true; @@ -1135,6 +1167,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { if (PriorTBB == MBB) { SmallVector NewPriorCond(PriorCond); if (!TII->ReverseBranchCondition(NewPriorCond)) { + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl); MadeChange = true; @@ -1172,6 +1205,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { DEBUG(dbgs() << "\nMoving MBB: " << *MBB << "To make fallthrough to: " << *PriorTBB << "\n"); + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl); @@ -1201,6 +1235,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) { SmallVector NewCond(CurCond); if (!TII->ReverseBranchCondition(NewCond)) { + DebugLoc dl = getBranchDebugLoc(*MBB); TII->RemoveBranch(*MBB); TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl); MadeChange = true; @@ -1214,6 +1249,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { if (CurTBB && CurCond.empty() && CurFBB == 0 && IsBranchOnlyBlock(MBB) && CurTBB != MBB && !MBB->hasAddressTaken()) { + DebugLoc dl = getBranchDebugLoc(*MBB); // This block may contain just an unconditional branch. Because there can // be 'non-branch terminators' in the block, try removing the branch and // then seeing if the block is empty. @@ -1256,8 +1292,9 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { assert(PriorFBB == 0 && "Machine CFG out of date!"); PriorFBB = MBB; } + DebugLoc pdl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl); + TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl); } // Iterate through all the predecessors, revectoring each in-turn. @@ -1281,9 +1318,10 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB, NewCurFBB, NewCurCond, true); if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) { + DebugLoc pdl = getBranchDebugLoc(*PMBB); TII->RemoveBranch(*PMBB); NewCurCond.clear(); - TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl); + TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl); MadeChange = true; ++NumBranchOpts; PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false); @@ -1343,7 +1381,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { if (CurFallsThru) { MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); CurCond.clear(); - TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl); + TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc()); } MBB->moveAfter(PredBB); MadeChange = true; @@ -1446,7 +1484,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, continue; if (MO.isUse()) { Uses.insert(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) Uses.insert(*AS); } else if (!MO.isDead()) // Don't try to hoist code in the rare case the terminator defines a @@ -1469,6 +1507,9 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, bool IsDef = false; for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) { const MachineOperand &MO = PI->getOperand(i); + // If PI has a regmask operand, it is probably a call. Separate away. + if (MO.isRegMask()) + return Loc; if (!MO.isReg() || MO.isUse()) continue; unsigned Reg = MO.getReg(); @@ -1505,16 +1546,16 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, continue; if (MO.isUse()) { Uses.insert(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) Uses.insert(*AS); } else { if (Uses.count(Reg)) { Uses.erase(Reg); - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) Uses.erase(*SR); // Use getSubRegisters to be conservative } Defs.insert(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) Defs.insert(*AS); } } @@ -1581,6 +1622,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { bool IsSafe = true; for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { MachineOperand &MO = TIB->getOperand(i); + // Don't attempt to hoist instructions with register masks. + if (MO.isRegMask()) { + IsSafe = false; + break; + } if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); @@ -1615,6 +1661,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { IsSafe = false; break; } + + if (MO.isKill() && Uses.count(Reg)) + // Kills a register that's read by the instruction at the point of + // insertion. Remove the kill marker. + MO.setIsKill(false); } } if (!IsSafe) @@ -1632,7 +1683,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { unsigned Reg = MO.getReg(); if (!Reg || !LocalDefsSet.count(Reg)) continue; - for (const unsigned *OR = TRI->getOverlaps(Reg); *OR; ++OR) + for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR) LocalDefsSet.erase(*OR); } @@ -1645,11 +1696,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!Reg) continue; LocalDefs.push_back(Reg); - for (const unsigned *OR = TRI->getOverlaps(Reg); *OR; ++OR) + for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR) LocalDefsSet.insert(*OR); } - HasDups = true;; + HasDups = true; ++TIB; ++FIB; } diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp index 14eb0541dc8d..2b7dfdbe41a0 100644 --- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp @@ -58,7 +58,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, /// MarkAllocated - Mark a register and all of its aliases as allocated. void CCState::MarkAllocated(unsigned Reg) { - for (const unsigned *Alias = TRI.getOverlaps(Reg); + for (const uint16_t *Alias = TRI.getOverlaps(Reg); unsigned Reg = *Alias; ++Alias) UsedRegs[Reg/32] |= 1 << (Reg&31); } diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp index 424535ba2a1c..a81bb5cc5566 100644 --- a/contrib/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp @@ -19,36 +19,49 @@ using namespace llvm; /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { + initializeBranchFolderPassPass(Registry); initializeCalculateSpillWeightsPass(Registry); + initializeCodePlacementOptPass(Registry); initializeDeadMachineInstructionElimPass(Registry); + initializeExpandPostRAPass(Registry); + initializeExpandISelPseudosPass(Registry); + initializeFinalizeMachineBundlesPass(Registry); + initializeGCMachineCodeAnalysisPass(Registry); initializeGCModuleInfoPass(Registry); initializeIfConverterPass(Registry); initializeLiveDebugVariablesPass(Registry); initializeLiveIntervalsPass(Registry); initializeLiveStacksPass(Registry); initializeLiveVariablesPass(Registry); + initializeLocalStackSlotPassPass(Registry); initializeMachineBlockFrequencyInfoPass(Registry); + initializeMachineBlockPlacementPass(Registry); + initializeMachineBlockPlacementStatsPass(Registry); + initializeMachineCopyPropagationPass(Registry); initializeMachineCSEPass(Registry); initializeMachineDominatorTreePass(Registry); initializeMachineLICMPass(Registry); initializeMachineLoopInfoPass(Registry); initializeMachineModuleInfoPass(Registry); + initializeMachineSchedulerPass(Registry); initializeMachineSinkingPass(Registry); initializeMachineVerifierPassPass(Registry); initializeOptimizePHIsPass(Registry); initializePHIEliminationPass(Registry); initializePeepholeOptimizerPass(Registry); + initializePostRASchedulerPass(Registry); initializeProcessImplicitDefsPass(Registry); initializePEIPass(Registry); - initializeRALinScanPass(Registry); initializeRegisterCoalescerPass(Registry); initializeRenderMachineFunctionPass(Registry); initializeSlotIndexesPass(Registry); - initializeLoopSplitterPass(Registry); initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); initializeStrongPHIEliminationPass(Registry); + initializeTailDuplicatePassPass(Registry); + initializeTargetPassConfigPass(Registry); initializeTwoAddressInstructionPassPass(Registry); + initializeUnpackMachineBundlesPass(Registry); initializeUnreachableBlockElimPass(Registry); initializeUnreachableMachineBlockElimPass(Registry); initializeVirtRegMapPass(Registry); diff --git a/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp b/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp index 270c337ef67e..c13c05e26a20 100644 --- a/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp +++ b/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp @@ -39,9 +39,6 @@ namespace { CodePlacementOpt() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { - return "Code Placement Optimizer"; - } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); @@ -69,9 +66,9 @@ namespace { char CodePlacementOpt::ID = 0; } // end anonymous namespace -FunctionPass *llvm::createCodePlacementOptPass() { - return new CodePlacementOpt(); -} +char &llvm::CodePlacementOptID = CodePlacementOpt::ID; +INITIALIZE_PASS(CodePlacementOpt, "code-placement", + "Code Placement Optimizer", false, false) /// HasFallthrough - Test whether the given branch has a fallthrough, either as /// a plain fallthrough or as a fallthrough case of a conditional branch. diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 84c4d59c0e41..bad50103b9c3 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -35,7 +35,8 @@ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) : RegClassInfo(RCI), Classes(TRI->getNumRegs(), static_cast(0)), KillIndices(TRI->getNumRegs(), 0), - DefIndices(TRI->getNumRegs(), 0) {} + DefIndices(TRI->getNumRegs(), 0), + KeepRegs(TRI->getNumRegs(), false) {} CriticalAntiDepBreaker::~CriticalAntiDepBreaker() { } @@ -52,9 +53,9 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { } // Clear "do not change" set. - KeepRegs.clear(); + KeepRegs.reset(); - bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn()); + bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn()); // Determine the live-out physregs for this block. if (IsReturnBlock) { @@ -63,14 +64,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { E = MRI.liveout_end(); I != E; ++I) { unsigned Reg = *I; Classes[Reg] = reinterpret_cast(-1); - KillIndices[Reg] = BB->size(); + KillIndices[Reg] = BBSize; DefIndices[Reg] = ~0u; // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; Classes[AliasReg] = reinterpret_cast(-1); - KillIndices[AliasReg] = BB->size(); + KillIndices[AliasReg] = BBSize; DefIndices[AliasReg] = ~0u; } } @@ -85,14 +86,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; Classes[Reg] = reinterpret_cast(-1); - KillIndices[Reg] = BB->size(); + KillIndices[Reg] = BBSize; DefIndices[Reg] = ~0u; // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; Classes[AliasReg] = reinterpret_cast(-1); - KillIndices[AliasReg] = BB->size(); + KillIndices[AliasReg] = BBSize; DefIndices[AliasReg] = ~0u; } } @@ -102,18 +103,18 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // callee-saved register that is not saved in the prolog. const MachineFrameInfo *MFI = MF.getFrameInfo(); BitVector Pristine = MFI->getPristineRegs(BB); - for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { + for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { unsigned Reg = *I; if (!IsReturnBlock && !Pristine.test(Reg)) continue; Classes[Reg] = reinterpret_cast(-1); - KillIndices[Reg] = BB->size(); + KillIndices[Reg] = BBSize; DefIndices[Reg] = ~0u; // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; Classes[AliasReg] = reinterpret_cast(-1); - KillIndices[AliasReg] = BB->size(); + KillIndices[AliasReg] = BBSize; DefIndices[AliasReg] = ~0u; } } @@ -121,7 +122,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { void CriticalAntiDepBreaker::FinishBlock() { RegRefs.clear(); - KeepRegs.clear(); + KeepRegs.reset(); } void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, @@ -193,8 +194,8 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { // instruction which may not be executed. The second R6 def may or may not // re-define R6 so it's not safe to change it since the last R6 use cannot be // changed. - bool Special = MI->getDesc().isCall() || - MI->getDesc().hasExtraSrcRegAllocReq() || + bool Special = MI->isCall() || + MI->hasExtraSrcRegAllocReq() || TII->isPredicated(MI); // Scan the register operands for this instruction and update @@ -217,7 +218,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { Classes[Reg] = reinterpret_cast(-1); // Now check for aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { // If an alias of the reg is used during the live range, give up. // Note that this allows us to skip checking if AntiDepReg // overlaps with any of the aliases, among other things. @@ -233,10 +234,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { RegRefs.insert(std::make_pair(Reg, &MO)); if (MO.isUse() && Special) { - if (KeepRegs.insert(Reg)) { - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + if (!KeepRegs.test(Reg)) { + KeepRegs.set(Reg); + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) - KeepRegs.insert(*Subreg); + KeepRegs.set(*Subreg); } } } @@ -253,6 +255,17 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, // address updates. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); + + if (MO.isRegMask()) + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) + if (MO.clobbersPhysReg(i)) { + DefIndices[i] = Count; + KillIndices[i] = ~0u; + KeepRegs.reset(i); + Classes[i] = 0; + RegRefs.erase(i); + } + if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; @@ -265,21 +278,21 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, assert(((KillIndices[Reg] == ~0u) != (DefIndices[Reg] == ~0u)) && "Kill and Def maps aren't consistent for Reg!"); - KeepRegs.erase(Reg); + KeepRegs.reset(Reg); Classes[Reg] = 0; RegRefs.erase(Reg); // Repeat, for all subregs. - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) { unsigned SubregReg = *Subreg; DefIndices[SubregReg] = Count; KillIndices[SubregReg] = ~0u; - KeepRegs.erase(SubregReg); + KeepRegs.reset(SubregReg); Classes[SubregReg] = 0; RegRefs.erase(SubregReg); } // Conservatively mark super-registers as unusable. - for (const unsigned *Super = TRI->getSuperRegisters(Reg); + for (const uint16_t *Super = TRI->getSuperRegisters(Reg); *Super; ++Super) { unsigned SuperReg = *Super; Classes[SuperReg] = reinterpret_cast(-1); @@ -315,7 +328,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, "Kill and Def maps aren't consistent for Reg!"); } // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; if (KillIndices[AliasReg] == ~0u) { KillIndices[AliasReg] = Count; @@ -355,6 +368,9 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &CheckOper = MI->getOperand(i); + if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg)) + return true; + if (!CheckOper.isReg() || !CheckOper.isDef() || CheckOper.getReg() != NewReg) continue; @@ -427,6 +443,8 @@ BreakAntiDependencies(const std::vector& SUnits, // Keep a map of the MachineInstr*'s back to the SUnit representing them. // This is used for updating debug information. + // + // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap DenseMap MISUnitMap; // Find the node at the bottom of the critical path. @@ -535,7 +553,7 @@ BreakAntiDependencies(const std::vector& SUnits, if (!RegClassInfo.isAllocatable(AntiDepReg)) // Don't break anti-dependencies on non-allocatable registers. AntiDepReg = 0; - else if (KeepRegs.count(AntiDepReg)) + else if (KeepRegs.test(AntiDepReg)) // Don't break anti-dependencies if an use down below requires // this exact register. AntiDepReg = 0; @@ -572,7 +590,7 @@ BreakAntiDependencies(const std::vector& SUnits, // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). - if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() || + if (MI->isCall() || MI->hasExtraDefRegAllocReq() || TII->isPredicated(MI)) // If this instruction's defs have special allocation requirement, don't // break this anti-dependency. diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h index 07107802972d..77462593896e 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h @@ -24,7 +24,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallSet.h" #include namespace llvm { @@ -66,7 +65,7 @@ class TargetRegisterInfo; /// KeepRegs - A set of registers which are live and cannot be changed to /// break anti-dependencies. - SmallSet KeepRegs; + BitVector KeepRegs; public: CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&); diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp new file mode 100644 index 000000000000..bfbe7790998f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -0,0 +1,223 @@ +//=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This class implements a deterministic finite automaton (DFA) based +// packetizing mechanism for VLIW architectures. It provides APIs to +// determine whether there exists a legal mapping of instructions to +// functional unit assignments in a packet. The DFA is auto-generated from +// the target's Schedule.td file. +// +// A DFA consists of 3 major elements: states, inputs, and transitions. For +// the packetizing mechanism, the input is the set of instruction classes for +// a target. The state models all possible combinations of functional unit +// consumption for a given set of instructions in a packet. A transition +// models the addition of an instruction to a packet. In the DFA constructed +// by this class, if an instruction can be added to a packet, then a valid +// transition exists from the corresponding state. Invalid transitions +// indicate that the instruction cannot be added to the current packet. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/MC/MCInstrItineraries.h" +using namespace llvm; + +DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2], + const unsigned *SET): + InstrItins(I), CurrentState(0), DFAStateInputTable(SIT), + DFAStateEntryTable(SET) {} + + +// +// ReadTable - Read the DFA transition table and update CachedTable. +// +// Format of the transition tables: +// DFAStateInputTable[][2] = pairs of for all valid +// transitions +// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable +// for the ith state +// +void DFAPacketizer::ReadTable(unsigned int state) { + unsigned ThisState = DFAStateEntryTable[state]; + unsigned NextStateInTable = DFAStateEntryTable[state+1]; + // Early exit in case CachedTable has already contains this + // state's transitions. + if (CachedTable.count(UnsignPair(state, + DFAStateInputTable[ThisState][0]))) + return; + + for (unsigned i = ThisState; i < NextStateInTable; i++) + CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] = + DFAStateInputTable[i][1]; +} + + +// canReserveResources - Check if the resources occupied by a MCInstrDesc +// are available in the current state. +bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) { + unsigned InsnClass = MID->getSchedClass(); + const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass); + unsigned FuncUnits = IS->getUnits(); + UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits); + ReadTable(CurrentState); + return (CachedTable.count(StateTrans) != 0); +} + + +// reserveResources - Reserve the resources occupied by a MCInstrDesc and +// change the current state to reflect that change. +void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) { + unsigned InsnClass = MID->getSchedClass(); + const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass); + unsigned FuncUnits = IS->getUnits(); + UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits); + ReadTable(CurrentState); + assert(CachedTable.count(StateTrans) != 0); + CurrentState = CachedTable[StateTrans]; +} + + +// canReserveResources - Check if the resources occupied by a machine +// instruction are available in the current state. +bool DFAPacketizer::canReserveResources(llvm::MachineInstr *MI) { + const llvm::MCInstrDesc &MID = MI->getDesc(); + return canReserveResources(&MID); +} + +// reserveResources - Reserve the resources occupied by a machine +// instruction and change the current state to reflect that change. +void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) { + const llvm::MCInstrDesc &MID = MI->getDesc(); + reserveResources(&MID); +} + +namespace llvm { +// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides +// Schedule method to build the dependence graph. +class DefaultVLIWScheduler : public ScheduleDAGInstrs { +public: + DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, + MachineDominatorTree &MDT, bool IsPostRA); + // Schedule - Actual scheduling work. + void schedule(); +}; +} + +DefaultVLIWScheduler::DefaultVLIWScheduler( + MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, + bool IsPostRA) : + ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) { +} + +void DefaultVLIWScheduler::schedule() { + // Build the scheduling graph. + buildSchedGraph(0); +} + +// VLIWPacketizerList Ctor +VLIWPacketizerList::VLIWPacketizerList( + MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, + bool IsPostRA) : TM(MF.getTarget()), MF(MF) { + TII = TM.getInstrInfo(); + ResourceTracker = TII->CreateTargetScheduleState(&TM, 0); + VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA); +} + +// VLIWPacketizerList Dtor +VLIWPacketizerList::~VLIWPacketizerList() { + if (VLIWScheduler) + delete VLIWScheduler; + + if (ResourceTracker) + delete ResourceTracker; +} + +// endPacket - End the current packet, bundle packet instructions and reset +// DFA state. +void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, + MachineInstr *MI) { + if (CurrentPacketMIs.size() > 1) { + MachineInstr *MIFirst = CurrentPacketMIs.front(); + finalizeBundle(*MBB, MIFirst, MI); + } + CurrentPacketMIs.clear(); + ResourceTracker->clearResources(); +} + +// PacketizeMIs - Bundle machine instructions into packets. +void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, + MachineBasicBlock::iterator BeginItr, + MachineBasicBlock::iterator EndItr) { + assert(VLIWScheduler && "VLIW Scheduler is not initialized!"); + VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size()); + VLIWScheduler->schedule(); + VLIWScheduler->exitRegion(); + + // Generate MI -> SU map. + //std::map MIToSUnit; + MIToSUnit.clear(); + for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) { + SUnit *SU = &VLIWScheduler->SUnits[i]; + MIToSUnit[SU->getInstr()] = SU; + } + + // The main packetizer loop. + for (; BeginItr != EndItr; ++BeginItr) { + MachineInstr *MI = BeginItr; + + this->initPacketizerState(); + + // End the current packet if needed. + if (this->isSoloInstruction(MI)) { + endPacket(MBB, MI); + continue; + } + + // Ignore pseudo instructions. + if (this->ignorePseudoInstruction(MI, MBB)) + continue; + + SUnit *SUI = MIToSUnit[MI]; + assert(SUI && "Missing SUnit Info!"); + + // Ask DFA if machine resource is available for MI. + bool ResourceAvail = ResourceTracker->canReserveResources(MI); + if (ResourceAvail) { + // Dependency check for MI with instructions in CurrentPacketMIs. + for (std::vector::iterator VI = CurrentPacketMIs.begin(), + VE = CurrentPacketMIs.end(); VI != VE; ++VI) { + MachineInstr *MJ = *VI; + SUnit *SUJ = MIToSUnit[MJ]; + assert(SUJ && "Missing SUnit Info!"); + + // Is it legal to packetize SUI and SUJ together. + if (!this->isLegalToPacketizeTogether(SUI, SUJ)) { + // Allow packetization if dependency can be pruned. + if (!this->isLegalToPruneDependencies(SUI, SUJ)) { + // End the packet if dependency cannot be pruned. + endPacket(MBB, MI); + break; + } // !isLegalToPruneDependencies. + } // !isLegalToPacketizeTogether. + } // For all instructions in CurrentPacketMIs. + } else { + // End the packet if resource is not available. + endPacket(MBB, MI); + } + + // Add MI to the current packet. + BeginItr = this->addToPacket(MI); + } // For all instructions in BB. + + // End any packet left behind. + endPacket(MBB, EndItr); +} diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index 6de6c0cb81bd..aa10d1d41f2b 100644 --- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -28,11 +28,12 @@ STATISTIC(NumDeletes, "Number of dead instructions deleted"); namespace { class DeadMachineInstructionElim : public MachineFunctionPass { virtual bool runOnMachineFunction(MachineFunction &MF); - + const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; const TargetInstrInfo *TII; BitVector LivePhysRegs; + BitVector ReservedRegs; public: static char ID; // Pass identification, replacement for typeid @@ -45,14 +46,11 @@ namespace { }; } char DeadMachineInstructionElim::ID = 0; +char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID; INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination", "Remove dead machine instructions", false, false) -FunctionPass *llvm::createDeadMachineInstructionElimPass() { - return new DeadMachineInstructionElim(); -} - bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { // Technically speaking inline asm without side effects and no defs can still // be deleted. But there is so much bad inline asm code out there, we should @@ -70,10 +68,14 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg) ? - LivePhysRegs[Reg] : !MRI->use_nodbg_empty(Reg)) { - // This def has a non-debug use. Don't delete the instruction! - return false; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // Don't delete live physreg defs, or any reserved register defs. + if (LivePhysRegs.test(Reg) || ReservedRegs.test(Reg)) + return false; + } else { + if (!MRI->use_nodbg_empty(Reg)) + // This def has a non-debug use. Don't delete the instruction! + return false; } } } @@ -89,7 +91,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); // Treat reserved registers as always live. - BitVector ReservedRegs = TRI->getReservedRegs(MF); + ReservedRegs = TRI->getReservedRegs(MF); // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will @@ -102,7 +104,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { LivePhysRegs = ReservedRegs; // Also add any explicit live-out physregs for this block. - if (!MBB->empty() && MBB->back().getDesc().isReturn()) + if (!MBB->empty() && MBB->back().isReturn()) for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(), LOE = MRI->liveout_end(); LOI != LOE; ++LOI) { unsigned Reg = *LOI; @@ -169,10 +171,13 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { // Check the subreg set, not the alias set, because a def // of a super-register may still be partially live after // this def. - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); *SubRegs; ++SubRegs) LivePhysRegs.reset(*SubRegs); } + } else if (MO.isRegMask()) { + // Register mask of preserved registers. All clobbers are dead. + LivePhysRegs.clearBitsNotInMask(MO.getRegMask()); } } // Record the physreg uses, after the defs, in case a physreg is @@ -183,7 +188,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { LivePhysRegs.set(Reg); - for (const unsigned *AliasSet = TRI->getAliasSet(Reg); + for (const uint16_t *AliasSet = TRI->getAliasSet(Reg); *AliasSet; ++AliasSet) LivePhysRegs.set(*AliasSet); } diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp index ed9e409d3e5a..944dd4fb41c8 100644 --- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -28,98 +28,34 @@ #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; -STATISTIC(NumLandingPadsSplit, "Number of landing pads split"); -STATISTIC(NumUnwindsLowered, "Number of unwind instructions lowered"); -STATISTIC(NumResumesLowered, "Number of eh.resume calls lowered"); -STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved"); +STATISTIC(NumResumesLowered, "Number of resume calls lowered"); namespace { class DwarfEHPrepare : public FunctionPass { const TargetMachine *TM; const TargetLowering *TLI; - // The eh.exception intrinsic. - Function *ExceptionValueIntrinsic; - - // The eh.selector intrinsic. - Function *SelectorIntrinsic; - - // _Unwind_Resume_or_Rethrow or _Unwind_SjLj_Resume call. - Constant *URoR; - - // The EH language-specific catch-all type. - GlobalVariable *EHCatchAllValue; - - // _Unwind_Resume or the target equivalent. + // RewindFunction - _Unwind_Resume or the target equivalent. Constant *RewindFunction; - // We both use and preserve dominator info. - DominatorTree *DT; + bool InsertUnwindResumeCalls(Function &Fn); + Instruction *GetExceptionObject(ResumeInst *RI); - // The function we are running on. - Function *F; - - // The landing pads for this function. - typedef SmallPtrSet BBSet; - BBSet LandingPads; - - bool InsertUnwindResumeCalls(); - - bool NormalizeLandingPads(); - bool LowerUnwindsAndResumes(); - bool MoveExceptionValueCalls(); - - Instruction *CreateExceptionValueCall(BasicBlock *BB); - - /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still - /// use the "llvm.eh.catch.all.value" call need to convert to using its - /// initializer instead. - bool CleanupSelectors(SmallPtrSet &Sels); - - bool HasCatchAllInSelector(IntrinsicInst *); - - /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups. - void FindAllCleanupSelectors(SmallPtrSet &Sels, - SmallPtrSet &CatchAllSels); - - /// FindAllURoRInvokes - Find all URoR invokes in the function. - void FindAllURoRInvokes(SmallPtrSet &URoRInvokes); - - /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or - /// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to - /// a landing pad within the current function. This is a candidate to merge - /// the selector associated with the URoR invoke with the one from the - /// URoR's landing pad. - bool HandleURoRInvokes(); - - /// FindSelectorAndURoR - Find the eh.selector call and URoR call associated - /// with the eh.exception call. This recursively looks past instructions - /// which don't change the EH pointer value, like casts or PHI nodes. - bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, - SmallPtrSet &SelCalls, - SmallPtrSet &SeenPHIs); - public: static char ID; // Pass identification, replacement for typeid. DwarfEHPrepare(const TargetMachine *tm) : FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()), - ExceptionValueIntrinsic(0), SelectorIntrinsic(0), - URoR(0), EHCatchAllValue(0), RewindFunction(0) { + RewindFunction(0) { initializeDominatorTreePass(*PassRegistry::getPassRegistry()); } virtual bool runOnFunction(Function &Fn); - // getAnalysisUsage - We need the dominator tree for handling URoR. - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addPreserved(); - } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { } const char *getPassName() const { return "Exception handling preparation"; } - }; } // end anonymous namespace @@ -129,543 +65,52 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) { return new DwarfEHPrepare(tm); } -/// HasCatchAllInSelector - Return true if the intrinsic instruction has a -/// catch-all. -bool DwarfEHPrepare::HasCatchAllInSelector(IntrinsicInst *II) { - if (!EHCatchAllValue) return false; +/// GetExceptionObject - Return the exception object from the value passed into +/// the 'resume' instruction (typically an aggregate). Clean up any dead +/// instructions, including the 'resume' instruction. +Instruction *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { + Value *V = RI->getOperand(0); + Instruction *ExnObj = 0; + InsertValueInst *SelIVI = dyn_cast(V); + LoadInst *SelLoad = 0; + InsertValueInst *ExcIVI = 0; + bool EraseIVIs = false; - unsigned ArgIdx = II->getNumArgOperands() - 1; - GlobalVariable *GV = dyn_cast(II->getArgOperand(ArgIdx)); - return GV == EHCatchAllValue; -} - -/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups. -void DwarfEHPrepare:: -FindAllCleanupSelectors(SmallPtrSet &Sels, - SmallPtrSet &CatchAllSels) { - for (Value::use_iterator - I = SelectorIntrinsic->use_begin(), - E = SelectorIntrinsic->use_end(); I != E; ++I) { - IntrinsicInst *II = cast(*I); - - if (II->getParent()->getParent() != F) - continue; - - if (!HasCatchAllInSelector(II)) - Sels.insert(II); - else - CatchAllSels.insert(II); - } -} - -/// FindAllURoRInvokes - Find all URoR invokes in the function. -void DwarfEHPrepare:: -FindAllURoRInvokes(SmallPtrSet &URoRInvokes) { - for (Value::use_iterator - I = URoR->use_begin(), - E = URoR->use_end(); I != E; ++I) { - if (InvokeInst *II = dyn_cast(*I)) - URoRInvokes.insert(II); - } -} - -/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use -/// the "llvm.eh.catch.all.value" call need to convert to using its -/// initializer instead. -bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet &Sels) { - if (!EHCatchAllValue) return false; - - if (!SelectorIntrinsic) { - SelectorIntrinsic = - Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector); - if (!SelectorIntrinsic) return false; - } - - bool Changed = false; - for (SmallPtrSet::iterator - I = Sels.begin(), E = Sels.end(); I != E; ++I) { - IntrinsicInst *Sel = *I; - - // Index of the "llvm.eh.catch.all.value" variable. - unsigned OpIdx = Sel->getNumArgOperands() - 1; - GlobalVariable *GV = dyn_cast(Sel->getArgOperand(OpIdx)); - if (GV != EHCatchAllValue) continue; - Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer()); - Changed = true; - } - - return Changed; -} - -/// FindSelectorAndURoR - Find the eh.selector call associated with the -/// eh.exception call. And indicate if there is a URoR "invoke" associated with -/// the eh.exception call. This recursively looks past instructions which don't -/// change the EH pointer value, like casts or PHI nodes. -bool -DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, - SmallPtrSet &SelCalls, - SmallPtrSet &SeenPHIs) { - bool Changed = false; - - for (Value::use_iterator - I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) { - Instruction *II = dyn_cast(*I); - if (!II || II->getParent()->getParent() != F) continue; - - if (IntrinsicInst *Sel = dyn_cast(II)) { - if (Sel->getIntrinsicID() == Intrinsic::eh_selector) - SelCalls.insert(Sel); - } else if (InvokeInst *Invoke = dyn_cast(II)) { - if (Invoke->getCalledFunction() == URoR) - URoRInvoke = true; - } else if (CastInst *CI = dyn_cast(II)) { - Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls, SeenPHIs); - } else if (PHINode *PN = dyn_cast(II)) { - if (SeenPHIs.insert(PN)) - // Don't process a PHI node more than once. - Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls, SeenPHIs); - } - } - - return Changed; -} - -/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or -/// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to a -/// landing pad within the current function. This is a candidate to merge the -/// selector associated with the URoR invoke with the one from the URoR's -/// landing pad. -bool DwarfEHPrepare::HandleURoRInvokes() { - if (!EHCatchAllValue) { - EHCatchAllValue = - F->getParent()->getNamedGlobal("llvm.eh.catch.all.value"); - if (!EHCatchAllValue) return false; - } - - if (!SelectorIntrinsic) { - SelectorIntrinsic = - Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector); - if (!SelectorIntrinsic) return false; - } - - SmallPtrSet Sels; - SmallPtrSet CatchAllSels; - FindAllCleanupSelectors(Sels, CatchAllSels); - - if (!URoR) { - URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow"); - if (!URoR) return CleanupSelectors(CatchAllSels); - } - - SmallPtrSet URoRInvokes; - FindAllURoRInvokes(URoRInvokes); - - SmallPtrSet SelsToConvert; - - for (SmallPtrSet::iterator - SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) { - const BasicBlock *SelBB = (*SI)->getParent(); - for (SmallPtrSet::iterator - UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) { - const BasicBlock *URoRBB = (*UI)->getParent(); - if (DT->dominates(SelBB, URoRBB)) { - SelsToConvert.insert(*SI); - break; + if (SelIVI) { + if (SelIVI->getNumIndices() == 1 && *SelIVI->idx_begin() == 1) { + ExcIVI = dyn_cast(SelIVI->getOperand(0)); + if (ExcIVI && isa(ExcIVI->getOperand(0)) && + ExcIVI->getNumIndices() == 1 && *ExcIVI->idx_begin() == 0) { + ExnObj = cast(ExcIVI->getOperand(1)); + SelLoad = dyn_cast(SelIVI->getOperand(1)); + EraseIVIs = true; } } } - bool Changed = false; + if (!ExnObj) + ExnObj = ExtractValueInst::Create(RI->getOperand(0), 0, "exn.obj", RI); - if (Sels.size() != SelsToConvert.size()) { - // If we haven't been able to convert all of the clean-up selectors, then - // loop through the slow way to see if they still need to be converted. - if (!ExceptionValueIntrinsic) { - ExceptionValueIntrinsic = - Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception); - if (!ExceptionValueIntrinsic) - return CleanupSelectors(CatchAllSels); - } + RI->eraseFromParent(); - for (Value::use_iterator - I = ExceptionValueIntrinsic->use_begin(), - E = ExceptionValueIntrinsic->use_end(); I != E; ++I) { - IntrinsicInst *EHPtr = dyn_cast(*I); - if (!EHPtr || EHPtr->getParent()->getParent() != F) continue; - - bool URoRInvoke = false; - SmallPtrSet SelCalls; - SmallPtrSet SeenPHIs; - Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls, SeenPHIs); - - if (URoRInvoke) { - // This EH pointer is being used by an invoke of an URoR instruction and - // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we - // need to convert it to a 'catch-all'. - for (SmallPtrSet::iterator - SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI) - if (!HasCatchAllInSelector(*SI)) - SelsToConvert.insert(*SI); - } - } + if (EraseIVIs) { + if (SelIVI->getNumUses() == 0) + SelIVI->eraseFromParent(); + if (ExcIVI->getNumUses() == 0) + ExcIVI->eraseFromParent(); + if (SelLoad && SelLoad->getNumUses() == 0) + SelLoad->eraseFromParent(); } - if (!SelsToConvert.empty()) { - // Convert all clean-up eh.selectors, which are associated with "invokes" of - // URoR calls, into catch-all eh.selectors. - Changed = true; - - for (SmallPtrSet::iterator - SI = SelsToConvert.begin(), SE = SelsToConvert.end(); - SI != SE; ++SI) { - IntrinsicInst *II = *SI; - - // Use the exception object pointer and the personality function - // from the original selector. - CallSite CS(II); - IntrinsicInst::op_iterator I = CS.arg_begin(); - IntrinsicInst::op_iterator E = CS.arg_end(); - IntrinsicInst::op_iterator B = prior(E); - - // Exclude last argument if it is an integer. - if (isa(B)) E = B; - - // Add exception object pointer (front). - // Add personality function (next). - // Add in any filter IDs (rest). - SmallVector Args(I, E); - - Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator. - - CallInst *NewSelector = - CallInst::Create(SelectorIntrinsic, Args, "eh.sel.catch.all", II); - - NewSelector->setTailCall(II->isTailCall()); - NewSelector->setAttributes(II->getAttributes()); - NewSelector->setCallingConv(II->getCallingConv()); - - II->replaceAllUsesWith(NewSelector); - II->eraseFromParent(); - } - } - - Changed |= CleanupSelectors(CatchAllSels); - return Changed; -} - -/// NormalizeLandingPads - Normalize and discover landing pads, noting them -/// in the LandingPads set. A landing pad is normal if the only CFG edges -/// that end at it are unwind edges from invoke instructions. If we inlined -/// through an invoke we could have a normal branch from the previous -/// unwind block through to the landing pad for the original invoke. -/// Abnormal landing pads are fixed up by redirecting all unwind edges to -/// a new basic block which falls through to the original. -bool DwarfEHPrepare::NormalizeLandingPads() { - bool Changed = false; - - const MCAsmInfo *MAI = TM->getMCAsmInfo(); - bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; - - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { - TerminatorInst *TI = I->getTerminator(); - if (!isa(TI)) - continue; - BasicBlock *LPad = TI->getSuccessor(1); - // Skip landing pads that have already been normalized. - if (LandingPads.count(LPad)) - continue; - - // Check that only invoke unwind edges end at the landing pad. - bool OnlyUnwoundTo = true; - bool SwitchOK = usingSjLjEH; - for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); - PI != PE; ++PI) { - TerminatorInst *PT = (*PI)->getTerminator(); - // The SjLj dispatch block uses a switch instruction. This is effectively - // an unwind edge, so we can disregard it here. There will only ever - // be one dispatch, however, so if there are multiple switches, one - // of them truly is a normal edge, not an unwind edge. - if (SwitchOK && isa(PT)) { - SwitchOK = false; - continue; - } - if (!isa(PT) || LPad == PT->getSuccessor(0)) { - OnlyUnwoundTo = false; - break; - } - } - - if (OnlyUnwoundTo) { - // Only unwind edges lead to the landing pad. Remember the landing pad. - LandingPads.insert(LPad); - continue; - } - - // At least one normal edge ends at the landing pad. Redirect the unwind - // edges to a new basic block which falls through into this one. - - // Create the new basic block. - BasicBlock *NewBB = BasicBlock::Create(F->getContext(), - LPad->getName() + "_unwind_edge"); - - // Insert it into the function right before the original landing pad. - LPad->getParent()->getBasicBlockList().insert(LPad, NewBB); - - // Redirect unwind edges from the original landing pad to NewBB. - for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) { - TerminatorInst *PT = (*PI++)->getTerminator(); - if (isa(PT) && PT->getSuccessor(1) == LPad) - // Unwind to the new block. - PT->setSuccessor(1, NewBB); - } - - // If there are any PHI nodes in LPad, we need to update them so that they - // merge incoming values from NewBB instead. - for (BasicBlock::iterator II = LPad->begin(); isa(II); ++II) { - PHINode *PN = cast(II); - pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB); - - // Check to see if all of the values coming in via unwind edges are the - // same. If so, we don't need to create a new PHI node. - Value *InVal = PN->getIncomingValueForBlock(*PB); - for (pred_iterator PI = PB; PI != PE; ++PI) { - if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) { - InVal = 0; - break; - } - } - - if (InVal == 0) { - // Different unwind edges have different values. Create a new PHI node - // in NewBB. - PHINode *NewPN = PHINode::Create(PN->getType(), - PN->getNumIncomingValues(), - PN->getName()+".unwind", NewBB); - // Add an entry for each unwind edge, using the value from the old PHI. - for (pred_iterator PI = PB; PI != PE; ++PI) - NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI); - - // Now use this new PHI as the common incoming value for NewBB in PN. - InVal = NewPN; - } - - // Revector exactly one entry in the PHI node to come from NewBB - // and delete all other entries that come from unwind edges. If - // there are both normal and unwind edges from the same predecessor, - // this leaves an entry for the normal edge. - for (pred_iterator PI = PB; PI != PE; ++PI) - PN->removeIncomingValue(*PI); - PN->addIncoming(InVal, NewBB); - } - - // Add a fallthrough from NewBB to the original landing pad. - BranchInst::Create(LPad, NewBB); - - // Now update DominatorTree analysis information. - DT->splitBlock(NewBB); - - // Remember the newly constructed landing pad. The original landing pad - // LPad is no longer a landing pad now that all unwind edges have been - // revectored to NewBB. - LandingPads.insert(NewBB); - ++NumLandingPadsSplit; - Changed = true; - } - - return Changed; -} - -/// LowerUnwinds - Turn unwind instructions into calls to _Unwind_Resume, -/// rethrowing any previously caught exception. This will crash horribly -/// at runtime if there is no such exception: using unwind to throw a new -/// exception is currently not supported. -bool DwarfEHPrepare::LowerUnwindsAndResumes() { - SmallVector ResumeInsts; - - for (Function::iterator fi = F->begin(), fe = F->end(); fi != fe; ++fi) { - for (BasicBlock::iterator bi = fi->begin(), be = fi->end(); bi != be; ++bi){ - if (isa(bi)) - ResumeInsts.push_back(bi); - else if (CallInst *call = dyn_cast(bi)) - if (Function *fn = dyn_cast(call->getCalledValue())) - if (fn->getName() == "llvm.eh.resume") - ResumeInsts.push_back(bi); - } - } - - if (ResumeInsts.empty()) return false; - - // Find the rewind function if we didn't already. - if (!RewindFunction) { - LLVMContext &Ctx = ResumeInsts[0]->getContext(); - FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), - Type::getInt8PtrTy(Ctx), false); - const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); - RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy); - } - - bool Changed = false; - - for (SmallVectorImpl::iterator - I = ResumeInsts.begin(), E = ResumeInsts.end(); I != E; ++I) { - Instruction *RI = *I; - - // Replace the resuming instruction with a call to _Unwind_Resume (or the - // appropriate target equivalent). - - llvm::Value *ExnValue; - if (isa(RI)) - ExnValue = CreateExceptionValueCall(RI->getParent()); - else - ExnValue = cast(RI)->getArgOperand(0); - - // Create the call... - CallInst *CI = CallInst::Create(RewindFunction, ExnValue, "", RI); - CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); - - // ...followed by an UnreachableInst, if it was an unwind. - // Calls to llvm.eh.resume are typically already followed by this. - if (isa(RI)) - new UnreachableInst(RI->getContext(), RI); - - if (isa(RI)) - ++NumUnwindsLowered; - else - ++NumResumesLowered; - - // Nuke the resume instruction. - RI->eraseFromParent(); - - Changed = true; - } - - return Changed; -} - -/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from -/// landing pads by replacing calls outside of landing pads with direct use of -/// a register holding the appropriate value; this requires adding calls inside -/// all landing pads to initialize the register. Also, move eh.exception calls -/// inside landing pads to the start of the landing pad (optional, but may make -/// things simpler for later passes). -bool DwarfEHPrepare::MoveExceptionValueCalls() { - // If the eh.exception intrinsic is not declared in the module then there is - // nothing to do. Speed up compilation by checking for this common case. - if (!ExceptionValueIntrinsic && - !F->getParent()->getFunction(Intrinsic::getName(Intrinsic::eh_exception))) - return false; - - bool Changed = false; - - // Move calls to eh.exception that are inside a landing pad to the start of - // the landing pad. - for (BBSet::const_iterator LI = LandingPads.begin(), LE = LandingPads.end(); - LI != LE; ++LI) { - BasicBlock *LP = *LI; - for (BasicBlock::iterator II = LP->getFirstNonPHIOrDbg(), IE = LP->end(); - II != IE;) - if (EHExceptionInst *EI = dyn_cast(II++)) { - // Found a call to eh.exception. - if (!EI->use_empty()) { - // If there is already a call to eh.exception at the start of the - // landing pad, then get hold of it; otherwise create such a call. - Value *CallAtStart = CreateExceptionValueCall(LP); - - // If the call was at the start of a landing pad then leave it alone. - if (EI == CallAtStart) - continue; - EI->replaceAllUsesWith(CallAtStart); - } - EI->eraseFromParent(); - ++NumExceptionValuesMoved; - Changed = true; - } - } - - // Look for calls to eh.exception that are not in a landing pad. If one is - // found, then a register that holds the exception value will be created in - // each landing pad, and the SSAUpdater will be used to compute the values - // returned by eh.exception calls outside of landing pads. - SSAUpdater SSA; - - // Remember where we found the eh.exception call, to avoid rescanning earlier - // basic blocks which we already know contain no eh.exception calls. - bool FoundCallOutsideLandingPad = false; - Function::iterator BB = F->begin(); - for (Function::iterator BE = F->end(); BB != BE; ++BB) { - // Skip over landing pads. - if (LandingPads.count(BB)) - continue; - - for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); - II != IE; ++II) - if (isa(II)) { - SSA.Initialize(II->getType(), II->getName()); - FoundCallOutsideLandingPad = true; - break; - } - - if (FoundCallOutsideLandingPad) - break; - } - - // If all calls to eh.exception are in landing pads then we are done. - if (!FoundCallOutsideLandingPad) - return Changed; - - // Add a call to eh.exception at the start of each landing pad, and tell the - // SSAUpdater that this is the value produced by the landing pad. - for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end(); - LI != LE; ++LI) - SSA.AddAvailableValue(*LI, CreateExceptionValueCall(*LI)); - - // Now turn all calls to eh.exception that are not in a landing pad into a use - // of the appropriate register. - for (Function::iterator BE = F->end(); BB != BE; ++BB) { - // Skip over landing pads. - if (LandingPads.count(BB)) - continue; - - for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); - II != IE;) - if (EHExceptionInst *EI = dyn_cast(II++)) { - // Found a call to eh.exception, replace it with the value from any - // upstream landing pad(s). - EI->replaceAllUsesWith(SSA.GetValueAtEndOfBlock(BB)); - EI->eraseFromParent(); - ++NumExceptionValuesMoved; - } - } - - return true; -} - -/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at -/// the start of the basic block (unless there already is one, in which case -/// the existing call is returned). -Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) { - Instruction *Start = BB->getFirstNonPHIOrDbg(); - // Is this a call to eh.exception? - if (IntrinsicInst *CI = dyn_cast(Start)) - if (CI->getIntrinsicID() == Intrinsic::eh_exception) - // Reuse the existing call. - return Start; - - // Find the eh.exception intrinsic if we didn't already. - if (!ExceptionValueIntrinsic) - ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(), - Intrinsic::eh_exception); - - // Create the call. - return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start); + return ExnObj; } /// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present /// into calls to the appropriate _Unwind_Resume function. -bool DwarfEHPrepare::InsertUnwindResumeCalls() { +bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { bool UsesNewEH = false; SmallVector Resumes; - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { TerminatorInst *TI = I->getTerminator(); if (ResumeInst *RI = dyn_cast(TI)) Resumes.push_back(RI); @@ -682,27 +127,45 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() { FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false); const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); - RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy); + RewindFunction = Fn.getParent()->getOrInsertFunction(RewindName, FTy); } // Create the basic block where the _Unwind_Resume call will live. - LLVMContext &Ctx = F->getContext(); - BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", F); - PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), Resumes.size(), + LLVMContext &Ctx = Fn.getContext(); + unsigned ResumesSize = Resumes.size(); + + if (ResumesSize == 1) { + // Instead of creating a new BB and PHI node, just append the call to + // _Unwind_Resume to the end of the single resume block. + ResumeInst *RI = Resumes.front(); + BasicBlock *UnwindBB = RI->getParent(); + Instruction *ExnObj = GetExceptionObject(RI); + + // Call the _Unwind_Resume function. + CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB); + CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); + + // We never expect _Unwind_Resume to return. + new UnreachableInst(Ctx, UnwindBB); + return true; + } + + BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn); + PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize, "exn.obj", UnwindBB); // Extract the exception object from the ResumeInst and add it to the PHI node // that feeds the _Unwind_Resume call. - BasicBlock *UnwindBBDom = Resumes[0]->getParent(); for (SmallVectorImpl::iterator I = Resumes.begin(), E = Resumes.end(); I != E; ++I) { ResumeInst *RI = *I; - BranchInst::Create(UnwindBB, RI->getParent()); - ExtractValueInst *ExnObj = ExtractValueInst::Create(RI->getOperand(0), - 0, "exn.obj", RI); - PN->addIncoming(ExnObj, RI->getParent()); - UnwindBBDom = DT->findNearestCommonDominator(RI->getParent(), UnwindBBDom); - RI->eraseFromParent(); + BasicBlock *Parent = RI->getParent(); + BranchInst::Create(UnwindBB, Parent); + + Instruction *ExnObj = GetExceptionObject(RI); + PN->addIncoming(ExnObj, Parent); + + ++NumResumesLowered; } // Call the function. @@ -711,40 +174,10 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() { // We never expect _Unwind_Resume to return. new UnreachableInst(Ctx, UnwindBB); - - // Now update DominatorTree analysis information. - DT->addNewBlock(UnwindBB, UnwindBBDom); return true; } bool DwarfEHPrepare::runOnFunction(Function &Fn) { - bool Changed = false; - - // Initialize internal state. - DT = &getAnalysis(); // FIXME: We won't need this with the new EH. - F = &Fn; - - if (InsertUnwindResumeCalls()) { - // FIXME: The reset of this function can go once the new EH is done. - LandingPads.clear(); - return true; - } - - // Ensure that only unwind edges end at landing pads (a landing pad is a - // basic block where an invoke unwind edge ends). - Changed |= NormalizeLandingPads(); - - // Turn unwind instructions and eh.resume calls into libcalls. - Changed |= LowerUnwindsAndResumes(); - - // TODO: Move eh.selector calls to landing pads and combine them. - - // Move eh.exception calls to landing pads. - Changed |= MoveExceptionValueCalls(); - - Changed |= HandleURoRInvokes(); - - LandingPads.clear(); - + bool Changed = InsertUnwindResumeCalls(Fn); return Changed; } diff --git a/contrib/llvm/lib/CodeGen/ELF.h b/contrib/llvm/lib/CodeGen/ELF.h deleted file mode 100644 index 5b634682cc87..000000000000 --- a/contrib/llvm/lib/CodeGen/ELF.h +++ /dev/null @@ -1,227 +0,0 @@ -//===-- lib/CodeGen/ELF.h - ELF constants and data structures ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This header contains common, non-processor-specific data structures and -// constants for the ELF file format. -// -// The details of the ELF32 bits in this file are largely based on the Tool -// Interface Standard (TIS) Executable and Linking Format (ELF) Specification -// Version 1.2, May 1995. The ELF64 is based on HP/Intel definition of the -// ELF-64 object file format document, Version 1.5 Draft 2 May 27, 1998 -// -//===----------------------------------------------------------------------===// - -#ifndef CODEGEN_ELF_H -#define CODEGEN_ELF_H - -#include "llvm/CodeGen/BinaryObject.h" -#include "llvm/CodeGen/MachineRelocation.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/DataTypes.h" - -namespace llvm { - class GlobalValue; - - /// ELFSym - This struct contains information about each symbol that is - /// added to logical symbol table for the module. This is eventually - /// turned into a real symbol table in the file. - struct ELFSym { - - // ELF symbols are related to llvm ones by being one of the two llvm - // types, for the other ones (section, file, func) a null pointer is - // assumed by default. - union { - const GlobalValue *GV; // If this is a pointer to a GV - const char *Ext; // If this is a pointer to a named symbol - } Source; - - // Describes from which source type this ELF symbol comes from, - // they can be GlobalValue, ExternalSymbol or neither. - enum { - isGV, // The Source.GV field is valid. - isExtSym, // The Source.ExtSym field is valid. - isOther // Not a GlobalValue or External Symbol - }; - unsigned SourceType; - - bool isGlobalValue() const { return SourceType == isGV; } - bool isExternalSym() const { return SourceType == isExtSym; } - - // getGlobalValue - If this is a global value which originated the - // elf symbol, return a reference to it. - const GlobalValue *getGlobalValue() const { - assert(SourceType == isGV && "This is not a global value"); - return Source.GV; - } - - // getExternalSym - If this is an external symbol which originated the - // elf symbol, return a reference to it. - const char *getExternalSymbol() const { - assert(SourceType == isExtSym && "This is not an external symbol"); - return Source.Ext; - } - - // getGV - From a global value return a elf symbol to represent it - static ELFSym *getGV(const GlobalValue *GV, unsigned Bind, - unsigned Type, unsigned Visibility) { - ELFSym *Sym = new ELFSym(); - Sym->Source.GV = GV; - Sym->setBind(Bind); - Sym->setType(Type); - Sym->setVisibility(Visibility); - Sym->SourceType = isGV; - return Sym; - } - - // getExtSym - Create and return an elf symbol to represent an - // external symbol - static ELFSym *getExtSym(const char *Ext) { - ELFSym *Sym = new ELFSym(); - Sym->Source.Ext = Ext; - Sym->setBind(ELF::STB_GLOBAL); - Sym->setType(ELF::STT_NOTYPE); - Sym->setVisibility(ELF::STV_DEFAULT); - Sym->SourceType = isExtSym; - return Sym; - } - - // getSectionSym - Returns a elf symbol to represent an elf section - static ELFSym *getSectionSym() { - ELFSym *Sym = new ELFSym(); - Sym->setBind(ELF::STB_LOCAL); - Sym->setType(ELF::STT_SECTION); - Sym->setVisibility(ELF::STV_DEFAULT); - Sym->SourceType = isOther; - return Sym; - } - - // getFileSym - Returns a elf symbol to represent the module identifier - static ELFSym *getFileSym() { - ELFSym *Sym = new ELFSym(); - Sym->setBind(ELF::STB_LOCAL); - Sym->setType(ELF::STT_FILE); - Sym->setVisibility(ELF::STV_DEFAULT); - Sym->SectionIdx = 0xfff1; // ELFSection::SHN_ABS; - Sym->SourceType = isOther; - return Sym; - } - - // getUndefGV - Returns a STT_NOTYPE symbol - static ELFSym *getUndefGV(const GlobalValue *GV, unsigned Bind) { - ELFSym *Sym = new ELFSym(); - Sym->Source.GV = GV; - Sym->setBind(Bind); - Sym->setType(ELF::STT_NOTYPE); - Sym->setVisibility(ELF::STV_DEFAULT); - Sym->SectionIdx = 0; //ELFSection::SHN_UNDEF; - Sym->SourceType = isGV; - return Sym; - } - - // ELF specific fields - unsigned NameIdx; // Index in .strtab of name, once emitted. - uint64_t Value; - unsigned Size; - uint8_t Info; - uint8_t Other; - unsigned short SectionIdx; - - // Symbol index into the Symbol table - unsigned SymTabIdx; - - ELFSym() : SourceType(isOther), NameIdx(0), Value(0), - Size(0), Info(0), Other(ELF::STV_DEFAULT), SectionIdx(0), - SymTabIdx(0) {} - - unsigned getBind() const { return (Info >> 4) & 0xf; } - unsigned getType() const { return Info & 0xf; } - bool isLocalBind() const { return getBind() == ELF::STB_LOCAL; } - bool isFileType() const { return getType() == ELF::STT_FILE; } - - void setBind(unsigned X) { - assert(X == (X & 0xF) && "Bind value out of range!"); - Info = (Info & 0x0F) | (X << 4); - } - - void setType(unsigned X) { - assert(X == (X & 0xF) && "Type value out of range!"); - Info = (Info & 0xF0) | X; - } - - void setVisibility(unsigned V) { - assert(V == (V & 0x3) && "Visibility value out of range!"); - Other = V; - } - }; - - /// ELFSection - This struct contains information about each section that is - /// emitted to the file. This is eventually turned into the section header - /// table at the end of the file. - class ELFSection : public BinaryObject { - public: - // ELF specific fields - unsigned NameIdx; // sh_name - .shstrtab idx of name, once emitted. - unsigned Type; // sh_type - Section contents & semantics - unsigned Flags; // sh_flags - Section flags. - uint64_t Addr; // sh_addr - The mem addr this section is in. - unsigned Offset; // sh_offset - Offset from the file start - unsigned Size; // sh_size - The section size. - unsigned Link; // sh_link - Section header table index link. - unsigned Info; // sh_info - Auxiliary information. - unsigned Align; // sh_addralign - Alignment of section. - unsigned EntSize; // sh_entsize - Size of entries in the section e - - /// SectionIdx - The number of the section in the Section Table. - unsigned short SectionIdx; - - /// Sym - The symbol to represent this section if it has one. - ELFSym *Sym; - - /// getSymIndex - Returns the symbol table index of the symbol - /// representing this section. - unsigned getSymbolTableIndex() const { - assert(Sym && "section not present in the symbol table"); - return Sym->SymTabIdx; - } - - ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit) - : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0), - Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0), Sym(0) {} - }; - - /// ELFRelocation - This class contains all the information necessary to - /// to generate any 32-bit or 64-bit ELF relocation entry. - class ELFRelocation { - uint64_t r_offset; // offset in the section of the object this applies to - uint32_t r_symidx; // symbol table index of the symbol to use - uint32_t r_type; // machine specific relocation type - int64_t r_add; // explicit relocation addend - bool r_rela; // if true then the addend is part of the entry - // otherwise the addend is at the location specified - // by r_offset - public: - uint64_t getInfo(bool is64Bit) const { - if (is64Bit) - return ((uint64_t)r_symidx << 32) + ((uint64_t)r_type & 0xFFFFFFFFL); - else - return (r_symidx << 8) + (r_type & 0xFFL); - } - - uint64_t getOffset() const { return r_offset; } - int64_t getAddend() const { return r_add; } - - ELFRelocation(uint64_t off, uint32_t sym, uint32_t type, - bool rela = true, int64_t addend = 0) : - r_offset(off), r_symidx(sym), r_type(type), - r_add(addend), r_rela(rela) {} - }; - -} // end namespace llvm - -#endif diff --git a/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp deleted file mode 100644 index 660424c3c141..000000000000 --- a/contrib/llvm/lib/CodeGen/ELFCodeEmitter.cpp +++ /dev/null @@ -1,205 +0,0 @@ -//===-- lib/CodeGen/ELFCodeEmitter.cpp ------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "elfce" - -#include "ELF.h" -#include "ELFWriter.h" -#include "ELFCodeEmitter.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/BinaryObject.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/MachineRelocation.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetELFWriterInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" - -//===----------------------------------------------------------------------===// -// ELFCodeEmitter Implementation -//===----------------------------------------------------------------------===// - -namespace llvm { - -/// startFunction - This callback is invoked when a new machine function is -/// about to be emitted. -void ELFCodeEmitter::startFunction(MachineFunction &MF) { - DEBUG(dbgs() << "processing function: " - << MF.getFunction()->getName() << "\n"); - - // Get the ELF Section that this function belongs in. - ES = &EW.getTextSection(MF.getFunction()); - - // Set the desired binary object to be used by the code emitters - setBinaryObject(ES); - - // Get the function alignment in bytes - unsigned Align = (1 << MF.getAlignment()); - - // The function must start on its required alignment - ES->emitAlignment(Align); - - // Update the section alignment if needed. - ES->Align = std::max(ES->Align, Align); - - // Record the function start offset - FnStartOff = ES->getCurrentPCOffset(); - - // Emit constant pool and jump tables to their appropriate sections. - // They need to be emitted before the function because in some targets - // the later may reference JT or CP entry address. - emitConstantPool(MF.getConstantPool()); - if (MF.getJumpTableInfo()) - emitJumpTables(MF.getJumpTableInfo()); -} - -/// finishFunction - This callback is invoked after the function is completely -/// finished. -bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { - // Add a symbol to represent the function. - const Function *F = MF.getFunction(); - ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELF::STT_FUNC, - EW.getGlobalELFVisibility(F)); - FnSym->SectionIdx = ES->SectionIdx; - FnSym->Size = ES->getCurrentPCOffset()-FnStartOff; - EW.AddPendingGlobalSymbol(F, true); - - // Offset from start of Section - FnSym->Value = FnStartOff; - - if (!F->hasPrivateLinkage()) - EW.SymbolList.push_back(FnSym); - - // Patch up Jump Table Section relocations to use the real MBBs offsets - // now that the MBB label offsets inside the function are known. - if (MF.getJumpTableInfo()) { - ELFSection &JTSection = EW.getJumpTableSection(); - for (std::vector::iterator MRI = JTRelocations.begin(), - MRE = JTRelocations.end(); MRI != MRE; ++MRI) { - MachineRelocation &MR = *MRI; - uintptr_t MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock()); - MR.setResultPointer((void*)MBBOffset); - MR.setConstantVal(ES->SectionIdx); - JTSection.addRelocation(MR); - } - } - - // If we have emitted any relocations to function-specific objects such as - // basic blocks, constant pools entries, or jump tables, record their - // addresses now so that we can rewrite them with the correct addresses later - for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { - MachineRelocation &MR = Relocations[i]; - intptr_t Addr; - if (MR.isGlobalValue()) { - EW.AddPendingGlobalSymbol(MR.getGlobalValue()); - } else if (MR.isExternalSymbol()) { - EW.AddPendingExternalSymbol(MR.getExternalSymbol()); - } else if (MR.isBasicBlock()) { - Addr = getMachineBasicBlockAddress(MR.getBasicBlock()); - MR.setConstantVal(ES->SectionIdx); - MR.setResultPointer((void*)Addr); - } else if (MR.isConstantPoolIndex()) { - Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex()); - MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]); - MR.setResultPointer((void*)Addr); - } else if (MR.isJumpTableIndex()) { - ELFSection &JTSection = EW.getJumpTableSection(); - Addr = getJumpTableEntryAddress(MR.getJumpTableIndex()); - MR.setConstantVal(JTSection.SectionIdx); - MR.setResultPointer((void*)Addr); - } else { - llvm_unreachable("Unhandled relocation type"); - } - ES->addRelocation(MR); - } - - // Clear per-function data structures. - JTRelocations.clear(); - Relocations.clear(); - CPLocations.clear(); - CPSections.clear(); - JTLocations.clear(); - MBBLocations.clear(); - return false; -} - -/// emitConstantPool - For each constant pool entry, figure out which section -/// the constant should live in and emit the constant -void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) { - const std::vector &CP = MCP->getConstants(); - if (CP.empty()) return; - - // TODO: handle PIC codegen - assert(TM.getRelocationModel() != Reloc::PIC_ && - "PIC codegen not yet handled for elf constant pools!"); - - for (unsigned i = 0, e = CP.size(); i != e; ++i) { - MachineConstantPoolEntry CPE = CP[i]; - - // Record the constant pool location and the section index - ELFSection &CstPool = EW.getConstantPoolSection(CPE); - CPLocations.push_back(CstPool.size()); - CPSections.push_back(CstPool.SectionIdx); - - if (CPE.isMachineConstantPoolEntry()) - assert(0 && "CPE.isMachineConstantPoolEntry not supported yet"); - - // Emit the constant to constant pool section - EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPool); - } -} - -/// emitJumpTables - Emit all the jump tables for a given jump table info -/// record to the appropriate section. -void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) { - const std::vector &JT = MJTI->getJumpTables(); - if (JT.empty()) return; - - // FIXME: handle PIC codegen - assert(TM.getRelocationModel() != Reloc::PIC_ && - "PIC codegen not yet handled for elf jump tables!"); - - const TargetELFWriterInfo *TEW = TM.getELFWriterInfo(); - unsigned EntrySize = 4; //MJTI->getEntrySize(); - - // Get the ELF Section to emit the jump table - ELFSection &JTSection = EW.getJumpTableSection(); - - // For each JT, record its offset from the start of the section - for (unsigned i = 0, e = JT.size(); i != e; ++i) { - const std::vector &MBBs = JT[i].MBBs; - - // Record JT 'i' offset in the JT section - JTLocations.push_back(JTSection.size()); - - // Each MBB entry in the Jump table section has a relocation entry - // against the current text section. - for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) { - unsigned MachineRelTy = TEW->getAbsoluteLabelMachineRelTy(); - MachineRelocation MR = - MachineRelocation::getBB(JTSection.size(), MachineRelTy, MBBs[mi]); - - // Add the relocation to the Jump Table section - JTRelocations.push_back(MR); - - // Output placeholder for MBB in the JT section - for (unsigned s=0; s < EntrySize; ++s) - JTSection.emitByte(0); - } - } -} - -} // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/ELFCodeEmitter.h b/contrib/llvm/lib/CodeGen/ELFCodeEmitter.h deleted file mode 100644 index 8671c674eecf..000000000000 --- a/contrib/llvm/lib/CodeGen/ELFCodeEmitter.h +++ /dev/null @@ -1,78 +0,0 @@ -//===-- lib/CodeGen/ELFCodeEmitter.h ----------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef ELFCODEEMITTER_H -#define ELFCODEEMITTER_H - -#include "llvm/CodeGen/ObjectCodeEmitter.h" -#include - -namespace llvm { - class ELFWriter; - class ELFSection; - - /// ELFCodeEmitter - This class is used by the ELFWriter to - /// emit the code for functions to the ELF file. - class ELFCodeEmitter : public ObjectCodeEmitter { - ELFWriter &EW; - - /// Target machine description - TargetMachine &TM; - - /// Section containing code for functions - ELFSection *ES; - - /// Relocations - Record relocations needed by the current function - std::vector Relocations; - - /// JTRelocations - Record relocations needed by the relocation - /// section. - std::vector JTRelocations; - - /// FnStartPtr - Function offset from the beginning of ELFSection 'ES' - uintptr_t FnStartOff; - public: - explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {} - - /// addRelocation - Register new relocations for this function - void addRelocation(const MachineRelocation &MR) { - Relocations.push_back(MR); - } - - /// emitConstantPool - For each constant pool entry, figure out which - /// section the constant should live in and emit data to it - void emitConstantPool(MachineConstantPool *MCP); - - /// emitJumpTables - Emit all the jump tables for a given jump table - /// info and record them to the appropriate section. - void emitJumpTables(MachineJumpTableInfo *MJTI); - - void startFunction(MachineFunction &F); - bool finishFunction(MachineFunction &F); - - /// emitLabel - Emits a label - virtual void emitLabel(MCSymbol *Label) { - assert(0 && "emitLabel not implemented"); - } - - /// getLabelAddress - Return the address of the specified LabelID, - /// only usable after the LabelID has been emitted. - virtual uintptr_t getLabelAddress(MCSymbol *Label) const { - assert(0 && "getLabelAddress not implemented"); - return 0; - } - - virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) {} - -}; // end class ELFCodeEmitter - -} // end namespace llvm - -#endif - diff --git a/contrib/llvm/lib/CodeGen/ELFWriter.cpp b/contrib/llvm/lib/CodeGen/ELFWriter.cpp deleted file mode 100644 index f2c218565854..000000000000 --- a/contrib/llvm/lib/CodeGen/ELFWriter.cpp +++ /dev/null @@ -1,1105 +0,0 @@ -//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the target-independent ELF writer. This file writes out -// the ELF file in the following order: -// -// #1. ELF Header -// #2. '.text' section -// #3. '.data' section -// #4. '.bss' section (conceptual position in file) -// ... -// #X. '.shstrtab' section -// #Y. Section Table -// -// The entries in the section table are laid out as: -// #0. Null entry [required] -// #1. ".text" entry - the program code -// #2. ".data" entry - global variables with initializers. [ if needed ] -// #3. ".bss" entry - global variables without initializers. [ if needed ] -// ... -// #N. ".shstrtab" entry - String table for the section names. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "elfwriter" -#include "ELF.h" -#include "ELFWriter.h" -#include "ELFCodeEmitter.h" -#include "llvm/Constants.h" -#include "llvm/Module.h" -#include "llvm/PassManager.h" -#include "llvm/DerivedTypes.h" -#include "llvm/CodeGen/BinaryObject.h" -#include "llvm/CodeGen/MachineCodeEmitter.h" -#include "llvm/CodeGen/ObjectCodeEmitter.h" -#include "llvm/CodeGen/MachineCodeEmitter.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetELFWriterInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallString.h" -using namespace llvm; - -char ELFWriter::ID = 0; - -//===----------------------------------------------------------------------===// -// ELFWriter Implementation -//===----------------------------------------------------------------------===// - -ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm) - : MachineFunctionPass(ID), O(o), TM(tm), - OutContext(*new MCContext(*TM.getMCAsmInfo(), *TM.getRegisterInfo(), - &TM.getTargetLowering()->getObjFileLowering())), - TLOF(TM.getTargetLowering()->getObjFileLowering()), - is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64), - isLittleEndian(TM.getTargetData()->isLittleEndian()), - ElfHdr(isLittleEndian, is64Bit) { - - MAI = TM.getMCAsmInfo(); - TEW = TM.getELFWriterInfo(); - - // Create the object code emitter object for this target. - ElfCE = new ELFCodeEmitter(*this); - - // Initial number of sections - NumSections = 0; -} - -ELFWriter::~ELFWriter() { - delete ElfCE; - delete &OutContext; - - while(!SymbolList.empty()) { - delete SymbolList.back(); - SymbolList.pop_back(); - } - - while(!PrivateSyms.empty()) { - delete PrivateSyms.back(); - PrivateSyms.pop_back(); - } - - while(!SectionList.empty()) { - delete SectionList.back(); - SectionList.pop_back(); - } - - // Release the name mangler object. - delete Mang; Mang = 0; -} - -// doInitialization - Emit the file header and all of the global variables for -// the module to the ELF file. -bool ELFWriter::doInitialization(Module &M) { - // Initialize TargetLoweringObjectFile. - const_cast(TLOF).Initialize(OutContext, TM); - - Mang = new Mangler(OutContext, *TM.getTargetData()); - - // ELF Header - // ---------- - // Fields e_shnum e_shstrndx are only known after all section have - // been emitted. They locations in the ouput buffer are recorded so - // to be patched up later. - // - // Note - // ---- - // emitWord method behaves differently for ELF32 and ELF64, writing - // 4 bytes in the former and 8 in the last for *_off and *_addr elf types - - ElfHdr.emitByte(0x7f); // e_ident[EI_MAG0] - ElfHdr.emitByte('E'); // e_ident[EI_MAG1] - ElfHdr.emitByte('L'); // e_ident[EI_MAG2] - ElfHdr.emitByte('F'); // e_ident[EI_MAG3] - - ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS] - ElfHdr.emitByte(TEW->getEIData()); // e_ident[EI_DATA] - ElfHdr.emitByte(ELF::EV_CURRENT); // e_ident[EI_VERSION] - ElfHdr.emitAlignment(16); // e_ident[EI_NIDENT-EI_PAD] - - ElfHdr.emitWord16(ELF::ET_REL); // e_type - ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target - ElfHdr.emitWord32(ELF::EV_CURRENT); // e_version - ElfHdr.emitWord(0); // e_entry, no entry point in .o file - ElfHdr.emitWord(0); // e_phoff, no program header for .o - ELFHdr_e_shoff_Offset = ElfHdr.size(); - ElfHdr.emitWord(0); // e_shoff = sec hdr table off in bytes - ElfHdr.emitWord32(TEW->getEFlags()); // e_flags = whatever the target wants - ElfHdr.emitWord16(TEW->getHdrSize()); // e_ehsize = ELF header size - ElfHdr.emitWord16(0); // e_phentsize = prog header entry size - ElfHdr.emitWord16(0); // e_phnum = # prog header entries = 0 - - // e_shentsize = Section header entry size - ElfHdr.emitWord16(TEW->getSHdrSize()); - - // e_shnum = # of section header ents - ELFHdr_e_shnum_Offset = ElfHdr.size(); - ElfHdr.emitWord16(0); // Placeholder - - // e_shstrndx = Section # of '.shstrtab' - ELFHdr_e_shstrndx_Offset = ElfHdr.size(); - ElfHdr.emitWord16(0); // Placeholder - - // Add the null section, which is required to be first in the file. - getNullSection(); - - // The first entry in the symtab is the null symbol and the second - // is a local symbol containing the module/file name - SymbolList.push_back(new ELFSym()); - SymbolList.push_back(ELFSym::getFileSym()); - - return false; -} - -// AddPendingGlobalSymbol - Add a global to be processed and to -// the global symbol lookup, use a zero index because the table -// index will be determined later. -void ELFWriter::AddPendingGlobalSymbol(const GlobalValue *GV, - bool AddToLookup /* = false */) { - PendingGlobals.insert(GV); - if (AddToLookup) - GblSymLookup[GV] = 0; -} - -// AddPendingExternalSymbol - Add the external to be processed -// and to the external symbol lookup, use a zero index because -// the symbol table index will be determined later. -void ELFWriter::AddPendingExternalSymbol(const char *External) { - PendingExternals.insert(External); - ExtSymLookup[External] = 0; -} - -ELFSection &ELFWriter::getDataSection() { - const MCSectionELF *Data = (const MCSectionELF *)TLOF.getDataSection(); - return getSection(Data->getSectionName(), Data->getType(), - Data->getFlags(), 4); -} - -ELFSection &ELFWriter::getBSSSection() { - const MCSectionELF *BSS = (const MCSectionELF *)TLOF.getBSSSection(); - return getSection(BSS->getSectionName(), BSS->getType(), BSS->getFlags(), 4); -} - -// getCtorSection - Get the static constructor section -ELFSection &ELFWriter::getCtorSection() { - const MCSectionELF *Ctor = (const MCSectionELF *)TLOF.getStaticCtorSection(); - return getSection(Ctor->getSectionName(), Ctor->getType(), Ctor->getFlags()); -} - -// getDtorSection - Get the static destructor section -ELFSection &ELFWriter::getDtorSection() { - const MCSectionELF *Dtor = (const MCSectionELF *)TLOF.getStaticDtorSection(); - return getSection(Dtor->getSectionName(), Dtor->getType(), Dtor->getFlags()); -} - -// getTextSection - Get the text section for the specified function -ELFSection &ELFWriter::getTextSection(const Function *F) { - const MCSectionELF *Text = - (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM); - return getSection(Text->getSectionName(), Text->getType(), Text->getFlags()); -} - -// getJumpTableSection - Get a read only section for constants when -// emitting jump tables. TODO: add PIC support -ELFSection &ELFWriter::getJumpTableSection() { - const MCSectionELF *JT = - (const MCSectionELF *)TLOF.getSectionForConstant(SectionKind::getReadOnly()); - return getSection(JT->getSectionName(), JT->getType(), JT->getFlags(), - TM.getTargetData()->getPointerABIAlignment()); -} - -// getConstantPoolSection - Get a constant pool section based on the machine -// constant pool entry type and relocation info. -ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) { - SectionKind Kind; - switch (CPE.getRelocationInfo()) { - default: llvm_unreachable("Unknown section kind"); - case 2: Kind = SectionKind::getReadOnlyWithRel(); break; - case 1: - Kind = SectionKind::getReadOnlyWithRelLocal(); - break; - case 0: - switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) { - case 4: Kind = SectionKind::getMergeableConst4(); break; - case 8: Kind = SectionKind::getMergeableConst8(); break; - case 16: Kind = SectionKind::getMergeableConst16(); break; - default: Kind = SectionKind::getMergeableConst(); break; - } - } - - const MCSectionELF *CPSect = - (const MCSectionELF *)TLOF.getSectionForConstant(Kind); - return getSection(CPSect->getSectionName(), CPSect->getType(), - CPSect->getFlags(), CPE.getAlignment()); -} - -// getRelocSection - Return the relocation section of section 'S'. 'RelA' -// is true if the relocation section contains entries with addends. -ELFSection &ELFWriter::getRelocSection(ELFSection &S) { - unsigned SectionType = TEW->hasRelocationAddend() ? - ELF::SHT_RELA : ELF::SHT_REL; - - std::string SectionName(".rel"); - if (TEW->hasRelocationAddend()) - SectionName.append("a"); - SectionName.append(S.getName()); - - return getSection(SectionName, SectionType, 0, TEW->getPrefELFAlignment()); -} - -// getGlobalELFVisibility - Returns the ELF specific visibility type -unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) { - switch (GV->getVisibility()) { - default: - llvm_unreachable("unknown visibility type"); - case GlobalValue::DefaultVisibility: - return ELF::STV_DEFAULT; - case GlobalValue::HiddenVisibility: - return ELF::STV_HIDDEN; - case GlobalValue::ProtectedVisibility: - return ELF::STV_PROTECTED; - } - return 0; -} - -// getGlobalELFBinding - Returns the ELF specific binding type -unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) { - if (GV->hasInternalLinkage()) - return ELF::STB_LOCAL; - - if (GV->isWeakForLinker() && !GV->hasCommonLinkage()) - return ELF::STB_WEAK; - - return ELF::STB_GLOBAL; -} - -// getGlobalELFType - Returns the ELF specific type for a global -unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) { - if (GV->isDeclaration()) - return ELF::STT_NOTYPE; - - if (isa(GV)) - return ELF::STT_FUNC; - - return ELF::STT_OBJECT; -} - -// IsELFUndefSym - True if the global value must be marked as a symbol -// which points to a SHN_UNDEF section. This means that the symbol has -// no definition on the module. -static bool IsELFUndefSym(const GlobalValue *GV) { - return GV->isDeclaration() || (isa(GV)); -} - -// AddToSymbolList - Update the symbol lookup and If the symbol is -// private add it to PrivateSyms list, otherwise to SymbolList. -void ELFWriter::AddToSymbolList(ELFSym *GblSym) { - assert(GblSym->isGlobalValue() && "Symbol must be a global value"); - - const GlobalValue *GV = GblSym->getGlobalValue(); - if (GV->hasPrivateLinkage()) { - // For a private symbols, keep track of the index inside - // the private list since it will never go to the symbol - // table and won't be patched up later. - PrivateSyms.push_back(GblSym); - GblSymLookup[GV] = PrivateSyms.size()-1; - } else { - // Non private symbol are left with zero indices until - // they are patched up during the symbol table emition - // (where the indicies are created). - SymbolList.push_back(GblSym); - GblSymLookup[GV] = 0; - } -} - -/// HasCommonSymbols - True if this section holds common symbols, this is -/// indicated on the ELF object file by a symbol with SHN_COMMON section -/// header index. -static bool HasCommonSymbols(const MCSectionELF &S) { - // FIXME: this is wrong, a common symbol can be in .data for example. - if (StringRef(S.getSectionName()).startswith(".gnu.linkonce.")) - return true; - - return false; -} - - -// EmitGlobal - Choose the right section for global and emit it -void ELFWriter::EmitGlobal(const GlobalValue *GV) { - - // Check if the referenced symbol is already emitted - if (GblSymLookup.find(GV) != GblSymLookup.end()) - return; - - // Handle ELF Bind, Visibility and Type for the current symbol - unsigned SymBind = getGlobalELFBinding(GV); - unsigned SymType = getGlobalELFType(GV); - bool IsUndefSym = IsELFUndefSym(GV); - - ELFSym *GblSym = IsUndefSym ? ELFSym::getUndefGV(GV, SymBind) - : ELFSym::getGV(GV, SymBind, SymType, getGlobalELFVisibility(GV)); - - if (!IsUndefSym) { - assert(isa(GV) && "GV not a global variable!"); - const GlobalVariable *GVar = dyn_cast(GV); - - // Handle special llvm globals - if (EmitSpecialLLVMGlobal(GVar)) - return; - - // Get the ELF section where this global belongs from TLOF - const MCSectionELF *S = - (const MCSectionELF *)TLOF.SectionForGlobal(GV, Mang, TM); - ELFSection &ES = - getSection(S->getSectionName(), S->getType(), S->getFlags()); - SectionKind Kind = S->getKind(); - - // The symbol align should update the section alignment if needed - const TargetData *TD = TM.getTargetData(); - unsigned Align = TD->getPreferredAlignment(GVar); - unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType()); - GblSym->Size = Size; - - if (HasCommonSymbols(*S)) { // Symbol must go to a common section - GblSym->SectionIdx = ELF::SHN_COMMON; - - // A new linkonce section is created for each global in the - // common section, the default alignment is 1 and the symbol - // value contains its alignment. - ES.Align = 1; - GblSym->Value = Align; - - } else if (Kind.isBSS() || Kind.isThreadBSS()) { // Symbol goes to BSS. - GblSym->SectionIdx = ES.SectionIdx; - - // Update the size with alignment and the next object can - // start in the right offset in the section - if (Align) ES.Size = (ES.Size + Align-1) & ~(Align-1); - ES.Align = std::max(ES.Align, Align); - - // GblSym->Value should contain the virtual offset inside the section. - // Virtual because the BSS space is not allocated on ELF objects - GblSym->Value = ES.Size; - ES.Size += Size; - - } else { // The symbol must go to some kind of data section - GblSym->SectionIdx = ES.SectionIdx; - - // GblSym->Value should contain the symbol offset inside the section, - // and all symbols should start on their required alignment boundary - ES.Align = std::max(ES.Align, Align); - ES.emitAlignment(Align); - GblSym->Value = ES.size(); - - // Emit the global to the data section 'ES' - EmitGlobalConstant(GVar->getInitializer(), ES); - } - } - - AddToSymbolList(GblSym); -} - -void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS, - ELFSection &GblS) { - - // Print the fields in successive locations. Pad to align if needed! - const TargetData *TD = TM.getTargetData(); - unsigned Size = TD->getTypeAllocSize(CVS->getType()); - const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType()); - uint64_t sizeSoFar = 0; - for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) { - const Constant* field = CVS->getOperand(i); - - // Check if padding is needed and insert one or more 0s. - uint64_t fieldSize = TD->getTypeAllocSize(field->getType()); - uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1)) - - cvsLayout->getElementOffset(i)) - fieldSize; - sizeSoFar += fieldSize + padSize; - - // Now print the actual field value. - EmitGlobalConstant(field, GblS); - - // Insert padding - this may include padding to increase the size of the - // current field up to the ABI size (if the struct is not packed) as well - // as padding to ensure that the next field starts at the right offset. - GblS.emitZeros(padSize); - } - assert(sizeSoFar == cvsLayout->getSizeInBytes() && - "Layout of constant struct may be incorrect!"); -} - -void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) { - const TargetData *TD = TM.getTargetData(); - unsigned Size = TD->getTypeAllocSize(CV->getType()); - - if (const ConstantArray *CVA = dyn_cast(CV)) { - for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i) - EmitGlobalConstant(CVA->getOperand(i), GblS); - return; - } else if (isa(CV)) { - GblS.emitZeros(Size); - return; - } else if (const ConstantStruct *CVS = dyn_cast(CV)) { - EmitGlobalConstantStruct(CVS, GblS); - return; - } else if (const ConstantFP *CFP = dyn_cast(CV)) { - APInt Val = CFP->getValueAPF().bitcastToAPInt(); - if (CFP->getType()->isDoubleTy()) - GblS.emitWord64(Val.getZExtValue()); - else if (CFP->getType()->isFloatTy()) - GblS.emitWord32(Val.getZExtValue()); - else if (CFP->getType()->isX86_FP80Ty()) { - unsigned PadSize = TD->getTypeAllocSize(CFP->getType())- - TD->getTypeStoreSize(CFP->getType()); - GblS.emitWordFP80(Val.getRawData(), PadSize); - } else if (CFP->getType()->isPPC_FP128Ty()) - llvm_unreachable("PPC_FP128Ty global emission not implemented"); - return; - } else if (const ConstantInt *CI = dyn_cast(CV)) { - if (Size == 1) - GblS.emitByte(CI->getZExtValue()); - else if (Size == 2) - GblS.emitWord16(CI->getZExtValue()); - else if (Size == 4) - GblS.emitWord32(CI->getZExtValue()); - else - EmitGlobalConstantLargeInt(CI, GblS); - return; - } else if (const ConstantVector *CP = dyn_cast(CV)) { - VectorType *PTy = CP->getType(); - for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I) - EmitGlobalConstant(CP->getOperand(I), GblS); - return; - } else if (const ConstantExpr *CE = dyn_cast(CV)) { - // Resolve a constant expression which returns a (Constant, Offset) - // pair. If 'Res.first' is a GlobalValue, emit a relocation with - // the offset 'Res.second', otherwise emit a global constant like - // it is always done for not contant expression types. - CstExprResTy Res = ResolveConstantExpr(CE); - const Constant *Op = Res.first; - - if (isa(Op)) - EmitGlobalDataRelocation(cast(Op), - TD->getTypeAllocSize(Op->getType()), - GblS, Res.second); - else - EmitGlobalConstant(Op, GblS); - - return; - } else if (CV->getType()->getTypeID() == Type::PointerTyID) { - // Fill the data entry with zeros or emit a relocation entry - if (isa(CV)) - GblS.emitZeros(Size); - else - EmitGlobalDataRelocation(cast(CV), - Size, GblS); - return; - } else if (const GlobalValue *GV = dyn_cast(CV)) { - // This is a constant address for a global variable or function and - // therefore must be referenced using a relocation entry. - EmitGlobalDataRelocation(GV, Size, GblS); - return; - } - - std::string msg; - raw_string_ostream ErrorMsg(msg); - ErrorMsg << "Constant unimp for type: " << *CV->getType(); - report_fatal_error(ErrorMsg.str()); -} - -// ResolveConstantExpr - Resolve the constant expression until it stop -// yielding other constant expressions. -CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) { - const TargetData *TD = TM.getTargetData(); - - // There ins't constant expression inside others anymore - if (!isa(CV)) - return std::make_pair(CV, 0); - - const ConstantExpr *CE = dyn_cast(CV); - switch (CE->getOpcode()) { - case Instruction::BitCast: - return ResolveConstantExpr(CE->getOperand(0)); - - case Instruction::GetElementPtr: { - const Constant *ptrVal = CE->getOperand(0); - SmallVector idxVec(CE->op_begin()+1, CE->op_end()); - int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), idxVec); - return std::make_pair(ptrVal, Offset); - } - case Instruction::IntToPtr: { - Constant *Op = CE->getOperand(0); - Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()), - false/*ZExt*/); - return ResolveConstantExpr(Op); - } - case Instruction::PtrToInt: { - Constant *Op = CE->getOperand(0); - Type *Ty = CE->getType(); - - // We can emit the pointer value into this slot if the slot is an - // integer slot greater or equal to the size of the pointer. - if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType())) - return ResolveConstantExpr(Op); - - llvm_unreachable("Integer size less then pointer size"); - } - case Instruction::Add: - case Instruction::Sub: { - // Only handle cases where there's a constant expression with GlobalValue - // as first operand and ConstantInt as second, which are the cases we can - // solve direclty using a relocation entry. GlobalValue=Op0, CstInt=Op1 - // 1) Instruction::Add => (global) + CstInt - // 2) Instruction::Sub => (global) + -CstInt - const Constant *Op0 = CE->getOperand(0); - const Constant *Op1 = CE->getOperand(1); - assert(isa(Op1) && "Op1 must be a ConstantInt"); - - CstExprResTy Res = ResolveConstantExpr(Op0); - assert(isa(Res.first) && "Op0 must be a GlobalValue"); - - const APInt &RHS = cast(Op1)->getValue(); - switch (CE->getOpcode()) { - case Instruction::Add: - return std::make_pair(Res.first, RHS.getSExtValue()); - case Instruction::Sub: - return std::make_pair(Res.first, (-RHS).getSExtValue()); - } - } - } - - report_fatal_error(CE->getOpcodeName() + - StringRef(": Unsupported ConstantExpr type")); - - return std::make_pair(CV, 0); // silence warning -} - -void ELFWriter::EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, - ELFSection &GblS, int64_t Offset) { - // Create the relocation entry for the global value - MachineRelocation MR = - MachineRelocation::getGV(GblS.getCurrentPCOffset(), - TEW->getAbsoluteLabelMachineRelTy(), - const_cast(GV), - Offset); - - // Fill the data entry with zeros - GblS.emitZeros(Size); - - // Add the relocation entry for the current data section - GblS.addRelocation(MR); -} - -void ELFWriter::EmitGlobalConstantLargeInt(const ConstantInt *CI, - ELFSection &S) { - const TargetData *TD = TM.getTargetData(); - unsigned BitWidth = CI->getBitWidth(); - assert(isPowerOf2_32(BitWidth) && - "Non-power-of-2-sized integers not handled!"); - - const uint64_t *RawData = CI->getValue().getRawData(); - uint64_t Val = 0; - for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { - Val = (TD->isBigEndian()) ? RawData[e - i - 1] : RawData[i]; - S.emitWord64(Val); - } -} - -/// EmitSpecialLLVMGlobal - Check to see if the specified global is a -/// special global used by LLVM. If so, emit it and return true, otherwise -/// do nothing and return false. -bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { - if (GV->getName() == "llvm.used") - llvm_unreachable("not implemented yet"); - - // Ignore debug and non-emitted data. This handles llvm.compiler.used. - if (GV->getSection() == "llvm.metadata" || - GV->hasAvailableExternallyLinkage()) - return true; - - if (!GV->hasAppendingLinkage()) return false; - - assert(GV->hasInitializer() && "Not a special LLVM global!"); - - const TargetData *TD = TM.getTargetData(); - unsigned Align = TD->getPointerPrefAlignment(); - if (GV->getName() == "llvm.global_ctors") { - ELFSection &Ctor = getCtorSection(); - Ctor.emitAlignment(Align); - EmitXXStructorList(GV->getInitializer(), Ctor); - return true; - } - - if (GV->getName() == "llvm.global_dtors") { - ELFSection &Dtor = getDtorSection(); - Dtor.emitAlignment(Align); - EmitXXStructorList(GV->getInitializer(), Dtor); - return true; - } - - return false; -} - -/// EmitXXStructorList - Emit the ctor or dtor list. This just emits out the -/// function pointers, ignoring the init priority. -void ELFWriter::EmitXXStructorList(const Constant *List, ELFSection &Xtor) { - // Should be an array of '{ i32, void ()* }' structs. The first value is the - // init priority, which we ignore. - if (List->isNullValue()) return; - const ConstantArray *InitList = cast(List); - for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { - if (InitList->getOperand(i)->isNullValue()) - continue; - ConstantStruct *CS = cast(InitList->getOperand(i)); - - if (CS->getOperand(1)->isNullValue()) - continue; - - // Emit the function pointer. - EmitGlobalConstant(CS->getOperand(1), Xtor); - } -} - -bool ELFWriter::runOnMachineFunction(MachineFunction &MF) { - // Nothing to do here, this is all done through the ElfCE object above. - return false; -} - -/// doFinalization - Now that the module has been completely processed, emit -/// the ELF file to 'O'. -bool ELFWriter::doFinalization(Module &M) { - // Emit .data section placeholder - getDataSection(); - - // Emit .bss section placeholder - getBSSSection(); - - // Build and emit data, bss and "common" sections. - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - EmitGlobal(I); - - // Emit all pending globals - for (PendingGblsIter I = PendingGlobals.begin(), E = PendingGlobals.end(); - I != E; ++I) - EmitGlobal(*I); - - // Emit all pending externals - for (PendingExtsIter I = PendingExternals.begin(), E = PendingExternals.end(); - I != E; ++I) - SymbolList.push_back(ELFSym::getExtSym(*I)); - - // Emit a symbol for each section created until now, skip null section - for (unsigned i = 1, e = SectionList.size(); i < e; ++i) { - ELFSection &ES = *SectionList[i]; - ELFSym *SectionSym = ELFSym::getSectionSym(); - SectionSym->SectionIdx = ES.SectionIdx; - SymbolList.push_back(SectionSym); - ES.Sym = SymbolList.back(); - } - - // Emit string table - EmitStringTable(M.getModuleIdentifier()); - - // Emit the symbol table now, if non-empty. - EmitSymbolTable(); - - // Emit the relocation sections. - EmitRelocations(); - - // Emit the sections string table. - EmitSectionTableStringTable(); - - // Dump the sections and section table to the .o file. - OutputSectionsAndSectionTable(); - - return false; -} - -// RelocateField - Patch relocatable field with 'Offset' in 'BO' -// using a 'Value' of known 'Size' -void ELFWriter::RelocateField(BinaryObject &BO, uint32_t Offset, - int64_t Value, unsigned Size) { - if (Size == 32) - BO.fixWord32(Value, Offset); - else if (Size == 64) - BO.fixWord64(Value, Offset); - else - llvm_unreachable("don't know howto patch relocatable field"); -} - -/// EmitRelocations - Emit relocations -void ELFWriter::EmitRelocations() { - - // True if the target uses the relocation entry to hold the addend, - // otherwise the addend is written directly to the relocatable field. - bool HasRelA = TEW->hasRelocationAddend(); - - // Create Relocation sections for each section which needs it. - for (unsigned i=0, e=SectionList.size(); i != e; ++i) { - ELFSection &S = *SectionList[i]; - - // This section does not have relocations - if (!S.hasRelocations()) continue; - ELFSection &RelSec = getRelocSection(S); - - // 'Link' - Section hdr idx of the associated symbol table - // 'Info' - Section hdr idx of the section to which the relocation applies - ELFSection &SymTab = getSymbolTableSection(); - RelSec.Link = SymTab.SectionIdx; - RelSec.Info = S.SectionIdx; - RelSec.EntSize = TEW->getRelocationEntrySize(); - - // Get the relocations from Section - std::vector Relos = S.getRelocations(); - for (std::vector::iterator MRI = Relos.begin(), - MRE = Relos.end(); MRI != MRE; ++MRI) { - MachineRelocation &MR = *MRI; - - // Relocatable field offset from the section start - unsigned RelOffset = MR.getMachineCodeOffset(); - - // Symbol index in the symbol table - unsigned SymIdx = 0; - - // Target specific relocation field type and size - unsigned RelType = TEW->getRelocationType(MR.getRelocationType()); - unsigned RelTySize = TEW->getRelocationTySize(RelType); - int64_t Addend = 0; - - // There are several machine relocations types, and each one of - // them needs a different approach to retrieve the symbol table index. - if (MR.isGlobalValue()) { - const GlobalValue *G = MR.getGlobalValue(); - int64_t GlobalOffset = MR.getConstantVal(); - SymIdx = GblSymLookup[G]; - if (G->hasPrivateLinkage()) { - // If the target uses a section offset in the relocation: - // SymIdx + Addend = section sym for global + section offset - unsigned SectionIdx = PrivateSyms[SymIdx]->SectionIdx; - Addend = PrivateSyms[SymIdx]->Value + GlobalOffset; - SymIdx = SectionList[SectionIdx]->getSymbolTableIndex(); - } else { - Addend = TEW->getDefaultAddendForRelTy(RelType, GlobalOffset); - } - } else if (MR.isExternalSymbol()) { - const char *ExtSym = MR.getExternalSymbol(); - SymIdx = ExtSymLookup[ExtSym]; - Addend = TEW->getDefaultAddendForRelTy(RelType); - } else { - // Get the symbol index for the section symbol - unsigned SectionIdx = MR.getConstantVal(); - SymIdx = SectionList[SectionIdx]->getSymbolTableIndex(); - - // The symbol offset inside the section - int64_t SymOffset = (int64_t)MR.getResultPointer(); - - // For pc relative relocations where symbols are defined in the same - // section they are referenced, ignore the relocation entry and patch - // the relocatable field with the symbol offset directly. - if (S.SectionIdx == SectionIdx && TEW->isPCRelativeRel(RelType)) { - int64_t Value = TEW->computeRelocation(SymOffset, RelOffset, RelType); - RelocateField(S, RelOffset, Value, RelTySize); - continue; - } - - Addend = TEW->getDefaultAddendForRelTy(RelType, SymOffset); - } - - // The target without addend on the relocation symbol must be - // patched in the relocation place itself to contain the addend - // otherwise write zeros to make sure there is no garbage there - RelocateField(S, RelOffset, HasRelA ? 0 : Addend, RelTySize); - - // Get the relocation entry and emit to the relocation section - ELFRelocation Rel(RelOffset, SymIdx, RelType, HasRelA, Addend); - EmitRelocation(RelSec, Rel, HasRelA); - } - } -} - -/// EmitRelocation - Write relocation 'Rel' to the relocation section 'Rel' -void ELFWriter::EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, - bool HasRelA) { - RelSec.emitWord(Rel.getOffset()); - RelSec.emitWord(Rel.getInfo(is64Bit)); - if (HasRelA) - RelSec.emitWord(Rel.getAddend()); -} - -/// EmitSymbol - Write symbol 'Sym' to the symbol table 'SymbolTable' -void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) { - if (is64Bit) { - SymbolTable.emitWord32(Sym.NameIdx); - SymbolTable.emitByte(Sym.Info); - SymbolTable.emitByte(Sym.Other); - SymbolTable.emitWord16(Sym.SectionIdx); - SymbolTable.emitWord64(Sym.Value); - SymbolTable.emitWord64(Sym.Size); - } else { - SymbolTable.emitWord32(Sym.NameIdx); - SymbolTable.emitWord32(Sym.Value); - SymbolTable.emitWord32(Sym.Size); - SymbolTable.emitByte(Sym.Info); - SymbolTable.emitByte(Sym.Other); - SymbolTable.emitWord16(Sym.SectionIdx); - } -} - -/// EmitSectionHeader - Write section 'Section' header in 'SHdrTab' -/// Section Header Table -void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab, - const ELFSection &SHdr) { - SHdrTab.emitWord32(SHdr.NameIdx); - SHdrTab.emitWord32(SHdr.Type); - if (is64Bit) { - SHdrTab.emitWord64(SHdr.Flags); - SHdrTab.emitWord(SHdr.Addr); - SHdrTab.emitWord(SHdr.Offset); - SHdrTab.emitWord64(SHdr.Size); - SHdrTab.emitWord32(SHdr.Link); - SHdrTab.emitWord32(SHdr.Info); - SHdrTab.emitWord64(SHdr.Align); - SHdrTab.emitWord64(SHdr.EntSize); - } else { - SHdrTab.emitWord32(SHdr.Flags); - SHdrTab.emitWord(SHdr.Addr); - SHdrTab.emitWord(SHdr.Offset); - SHdrTab.emitWord32(SHdr.Size); - SHdrTab.emitWord32(SHdr.Link); - SHdrTab.emitWord32(SHdr.Info); - SHdrTab.emitWord32(SHdr.Align); - SHdrTab.emitWord32(SHdr.EntSize); - } -} - -/// EmitStringTable - If the current symbol table is non-empty, emit the string -/// table for it -void ELFWriter::EmitStringTable(const std::string &ModuleName) { - if (!SymbolList.size()) return; // Empty symbol table. - ELFSection &StrTab = getStringTableSection(); - - // Set the zero'th symbol to a null byte, as required. - StrTab.emitByte(0); - - // Walk on the symbol list and write symbol names into the string table. - unsigned Index = 1; - for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) { - ELFSym &Sym = *(*I); - - std::string Name; - if (Sym.isGlobalValue()) { - SmallString<40> NameStr; - Mang->getNameWithPrefix(NameStr, Sym.getGlobalValue(), false); - Name.append(NameStr.begin(), NameStr.end()); - } else if (Sym.isExternalSym()) - Name.append(Sym.getExternalSymbol()); - else if (Sym.isFileType()) - Name.append(ModuleName); - - if (Name.empty()) { - Sym.NameIdx = 0; - } else { - Sym.NameIdx = Index; - StrTab.emitString(Name); - - // Keep track of the number of bytes emitted to this section. - Index += Name.size()+1; - } - } - assert(Index == StrTab.size()); - StrTab.Size = Index; -} - -// SortSymbols - On the symbol table local symbols must come before -// all other symbols with non-local bindings. The return value is -// the position of the first non local symbol. -unsigned ELFWriter::SortSymbols() { - unsigned FirstNonLocalSymbol; - std::vector LocalSyms, OtherSyms; - - for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) { - if ((*I)->isLocalBind()) - LocalSyms.push_back(*I); - else - OtherSyms.push_back(*I); - } - SymbolList.clear(); - FirstNonLocalSymbol = LocalSyms.size(); - - for (unsigned i = 0; i < FirstNonLocalSymbol; ++i) - SymbolList.push_back(LocalSyms[i]); - - for (ELFSymIter I=OtherSyms.begin(), E=OtherSyms.end(); I != E; ++I) - SymbolList.push_back(*I); - - LocalSyms.clear(); - OtherSyms.clear(); - - return FirstNonLocalSymbol; -} - -/// EmitSymbolTable - Emit the symbol table itself. -void ELFWriter::EmitSymbolTable() { - if (!SymbolList.size()) return; // Empty symbol table. - - // Now that we have emitted the string table and know the offset into the - // string table of each symbol, emit the symbol table itself. - ELFSection &SymTab = getSymbolTableSection(); - SymTab.Align = TEW->getPrefELFAlignment(); - - // Section Index of .strtab. - SymTab.Link = getStringTableSection().SectionIdx; - - // Size of each symtab entry. - SymTab.EntSize = TEW->getSymTabEntrySize(); - - // Reorder the symbol table with local symbols first! - unsigned FirstNonLocalSymbol = SortSymbols(); - - // Emit all the symbols to the symbol table. - for (unsigned i = 0, e = SymbolList.size(); i < e; ++i) { - ELFSym &Sym = *SymbolList[i]; - - // Emit symbol to the symbol table - EmitSymbol(SymTab, Sym); - - // Record the symbol table index for each symbol - if (Sym.isGlobalValue()) - GblSymLookup[Sym.getGlobalValue()] = i; - else if (Sym.isExternalSym()) - ExtSymLookup[Sym.getExternalSymbol()] = i; - - // Keep track on the symbol index into the symbol table - Sym.SymTabIdx = i; - } - - // One greater than the symbol table index of the last local symbol - SymTab.Info = FirstNonLocalSymbol; - SymTab.Size = SymTab.size(); -} - -/// EmitSectionTableStringTable - This method adds and emits a section for the -/// ELF Section Table string table: the string table that holds all of the -/// section names. -void ELFWriter::EmitSectionTableStringTable() { - // First step: add the section for the string table to the list of sections: - ELFSection &SHStrTab = getSectionHeaderStringTableSection(); - - // Now that we know which section number is the .shstrtab section, update the - // e_shstrndx entry in the ELF header. - ElfHdr.fixWord16(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset); - - // Set the NameIdx of each section in the string table and emit the bytes for - // the string table. - unsigned Index = 0; - - for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) { - ELFSection &S = *(*I); - // Set the index into the table. Note if we have lots of entries with - // common suffixes, we could memoize them here if we cared. - S.NameIdx = Index; - SHStrTab.emitString(S.getName()); - - // Keep track of the number of bytes emitted to this section. - Index += S.getName().size()+1; - } - - // Set the size of .shstrtab now that we know what it is. - assert(Index == SHStrTab.size()); - SHStrTab.Size = Index; -} - -/// OutputSectionsAndSectionTable - Now that we have constructed the file header -/// and all of the sections, emit these to the ostream destination and emit the -/// SectionTable. -void ELFWriter::OutputSectionsAndSectionTable() { - // Pass #1: Compute the file offset for each section. - size_t FileOff = ElfHdr.size(); // File header first. - - // Adjust alignment of all section if needed, skip the null section. - for (unsigned i=1, e=SectionList.size(); i < e; ++i) { - ELFSection &ES = *SectionList[i]; - if (!ES.size()) { - ES.Offset = FileOff; - continue; - } - - // Update Section size - if (!ES.Size) - ES.Size = ES.size(); - - // Align FileOff to whatever the alignment restrictions of the section are. - if (ES.Align) - FileOff = (FileOff+ES.Align-1) & ~(ES.Align-1); - - ES.Offset = FileOff; - FileOff += ES.Size; - } - - // Align Section Header. - unsigned TableAlign = TEW->getPrefELFAlignment(); - FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1); - - // Now that we know where all of the sections will be emitted, set the e_shnum - // entry in the ELF header. - ElfHdr.fixWord16(NumSections, ELFHdr_e_shnum_Offset); - - // Now that we know the offset in the file of the section table, update the - // e_shoff address in the ELF header. - ElfHdr.fixWord(FileOff, ELFHdr_e_shoff_Offset); - - // Now that we know all of the data in the file header, emit it and all of the - // sections! - O.write((char *)&ElfHdr.getData()[0], ElfHdr.size()); - FileOff = ElfHdr.size(); - - // Section Header Table blob - BinaryObject SHdrTable(isLittleEndian, is64Bit); - - // Emit all of sections to the file and build the section header table. - for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) { - ELFSection &S = *(*I); - DEBUG(dbgs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName() - << ", Size: " << S.Size << ", Offset: " << S.Offset - << ", SectionData Size: " << S.size() << "\n"); - - // Align FileOff to whatever the alignment restrictions of the section are. - if (S.size()) { - if (S.Align) { - for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1); - FileOff != NewFileOff; ++FileOff) - O << (char)0xAB; - } - O.write((char *)&S.getData()[0], S.Size); - FileOff += S.Size; - } - - EmitSectionHeader(SHdrTable, S); - } - - // Align output for the section table. - for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1); - FileOff != NewFileOff; ++FileOff) - O << (char)0xAB; - - // Emit the section table itself. - O.write((char *)&SHdrTable.getData()[0], SHdrTable.size()); -} diff --git a/contrib/llvm/lib/CodeGen/ELFWriter.h b/contrib/llvm/lib/CodeGen/ELFWriter.h deleted file mode 100644 index 6f7fbace8aba..000000000000 --- a/contrib/llvm/lib/CodeGen/ELFWriter.h +++ /dev/null @@ -1,251 +0,0 @@ -//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the ELFWriter class. -// -//===----------------------------------------------------------------------===// - -#ifndef ELFWRITER_H -#define ELFWRITER_H - -#include "llvm/ADT/SetVector.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include - -namespace llvm { - class BinaryObject; - class Constant; - class ConstantInt; - class ConstantStruct; - class ELFCodeEmitter; - class ELFRelocation; - class ELFSection; - struct ELFSym; - class GlobalVariable; - class JITDebugRegisterer; - class Mangler; - class MachineCodeEmitter; - class MachineConstantPoolEntry; - class ObjectCodeEmitter; - class MCAsmInfo; - class TargetELFWriterInfo; - class TargetLoweringObjectFile; - class raw_ostream; - class SectionKind; - class MCContext; - class TargetMachine; - - typedef std::vector::iterator ELFSymIter; - typedef std::vector::iterator ELFSectionIter; - typedef SetVector::const_iterator PendingGblsIter; - typedef SetVector::const_iterator PendingExtsIter; - typedef std::pair CstExprResTy; - - /// ELFWriter - This class implements the common target-independent code for - /// writing ELF files. Targets should derive a class from this to - /// parameterize the output format. - /// - class ELFWriter : public MachineFunctionPass { - friend class ELFCodeEmitter; - friend class JITDebugRegisterer; - public: - static char ID; - - /// Return the ELFCodeEmitter as an instance of ObjectCodeEmitter - ObjectCodeEmitter *getObjectCodeEmitter() { - return reinterpret_cast(ElfCE); - } - - ELFWriter(raw_ostream &O, TargetMachine &TM); - ~ELFWriter(); - - protected: - /// Output stream to send the resultant object file to. - raw_ostream &O; - - /// Target machine description. - TargetMachine &TM; - - /// Context object for machine code objects. - MCContext &OutContext; - - /// Target Elf Writer description. - const TargetELFWriterInfo *TEW; - - /// Mang - The object used to perform name mangling for this module. - Mangler *Mang; - - /// MCE - The MachineCodeEmitter object that we are exposing to emit machine - /// code for functions to the .o file. - ELFCodeEmitter *ElfCE; - - /// TLOF - Target Lowering Object File, provide section names for globals - /// and other object file specific stuff - const TargetLoweringObjectFile &TLOF; - - /// MAI - Target Asm Info, provide information about section names for - /// globals and other target specific stuff. - const MCAsmInfo *MAI; - - //===------------------------------------------------------------------===// - // Properties inferred automatically from the target machine. - //===------------------------------------------------------------------===// - - /// is64Bit/isLittleEndian - This information is inferred from the target - /// machine directly, indicating whether to emit a 32- or 64-bit ELF file. - bool is64Bit, isLittleEndian; - - /// doInitialization - Emit the file header and all of the global variables - /// for the module to the ELF file. - bool doInitialization(Module &M); - bool runOnMachineFunction(MachineFunction &MF); - - /// doFinalization - Now that the module has been completely processed, emit - /// the ELF file to 'O'. - bool doFinalization(Module &M); - - private: - /// Blob containing the Elf header - BinaryObject ElfHdr; - - /// SectionList - This is the list of sections that we have emitted to the - /// file. Once the file has been completely built, the section header table - /// is constructed from this info. - std::vector SectionList; - unsigned NumSections; // Always = SectionList.size() - - /// SectionLookup - This is a mapping from section name to section number in - /// the SectionList. Used to quickly gather the Section Index from MAI names - std::map SectionLookup; - - /// PendingGlobals - Globals not processed as symbols yet. - SetVector PendingGlobals; - - /// GblSymLookup - This is a mapping from global value to a symbol index - /// in the symbol table or private symbols list. This is useful since reloc - /// symbol references must be quickly mapped to their indices on the lists. - std::map GblSymLookup; - - /// PendingExternals - Externals not processed as symbols yet. - SetVector PendingExternals; - - /// ExtSymLookup - This is a mapping from externals to a symbol index - /// in the symbol table list. This is useful since reloc symbol references - /// must be quickly mapped to their symbol table indices. - std::map ExtSymLookup; - - /// SymbolList - This is the list of symbols emitted to the symbol table. - /// When the SymbolList is finally built, local symbols must be placed in - /// the beginning while non-locals at the end. - std::vector SymbolList; - - /// PrivateSyms - Record private symbols, every symbol here must never be - /// present in the SymbolList. - std::vector PrivateSyms; - - /// getSection - Return the section with the specified name, creating a new - /// section if one does not already exist. - ELFSection &getSection(const std::string &Name, unsigned Type, - unsigned Flags = 0, unsigned Align = 0) { - ELFSection *&SN = SectionLookup[Name]; - if (SN) return *SN; - - SectionList.push_back(new ELFSection(Name, isLittleEndian, is64Bit)); - SN = SectionList.back(); - SN->SectionIdx = NumSections++; - SN->Type = Type; - SN->Flags = Flags; - SN->Link = ELF::SHN_UNDEF; - SN->Align = Align; - return *SN; - } - - ELFSection &getNonExecStackSection() { - return getSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0, 1); - } - - ELFSection &getSymbolTableSection() { - return getSection(".symtab", ELF::SHT_SYMTAB, 0); - } - - ELFSection &getStringTableSection() { - return getSection(".strtab", ELF::SHT_STRTAB, 0, 1); - } - - ELFSection &getSectionHeaderStringTableSection() { - return getSection(".shstrtab", ELF::SHT_STRTAB, 0, 1); - } - - ELFSection &getNullSection() { - return getSection("", ELF::SHT_NULL, 0); - } - - ELFSection &getDataSection(); - ELFSection &getBSSSection(); - ELFSection &getCtorSection(); - ELFSection &getDtorSection(); - ELFSection &getJumpTableSection(); - ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE); - ELFSection &getTextSection(const Function *F); - ELFSection &getRelocSection(ELFSection &S); - - // Helpers for obtaining ELF specific info. - unsigned getGlobalELFBinding(const GlobalValue *GV); - unsigned getGlobalELFType(const GlobalValue *GV); - unsigned getGlobalELFVisibility(const GlobalValue *GV); - - // AddPendingGlobalSymbol - Add a global to be processed and to - // the global symbol lookup, use a zero index because the table - // index will be determined later. - void AddPendingGlobalSymbol(const GlobalValue *GV, - bool AddToLookup = false); - - // AddPendingExternalSymbol - Add the external to be processed - // and to the external symbol lookup, use a zero index because - // the symbol table index will be determined later. - void AddPendingExternalSymbol(const char *External); - - // AddToSymbolList - Update the symbol lookup and If the symbol is - // private add it to PrivateSyms list, otherwise to SymbolList. - void AddToSymbolList(ELFSym *GblSym); - - // As we complete the ELF file, we need to update fields in the ELF header - // (e.g. the location of the section table). These members keep track of - // the offset in ELFHeader of these various pieces to update and other - // locations in the file. - unsigned ELFHdr_e_shoff_Offset; // e_shoff in ELF header. - unsigned ELFHdr_e_shstrndx_Offset; // e_shstrndx in ELF header. - unsigned ELFHdr_e_shnum_Offset; // e_shnum in ELF header. - - private: - void EmitGlobal(const GlobalValue *GV); - void EmitGlobalConstant(const Constant *C, ELFSection &GblS); - void EmitGlobalConstantStruct(const ConstantStruct *CVS, - ELFSection &GblS); - void EmitGlobalConstantLargeInt(const ConstantInt *CI, ELFSection &S); - void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, - ELFSection &GblS, int64_t Offset = 0); - bool EmitSpecialLLVMGlobal(const GlobalVariable *GV); - void EmitXXStructorList(const Constant *List, ELFSection &Xtor); - void EmitRelocations(); - void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA); - void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr); - void EmitSectionTableStringTable(); - void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym); - void EmitSymbolTable(); - void EmitStringTable(const std::string &ModuleName); - void OutputSectionsAndSectionTable(); - void RelocateField(BinaryObject &BO, uint32_t Offset, int64_t Value, - unsigned Size); - unsigned SortSymbols(); - CstExprResTy ResolveConstantExpr(const Constant *CV); - }; -} - -#endif diff --git a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp index a7aba89b87f3..3bb04657b58a 100644 --- a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp +++ b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp @@ -77,7 +77,7 @@ void EdgeBundles::view() const { /// Specialize WriteGraph, the standard implementation won't work. raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G, bool ShortNames, - const std::string &Title) { + const Twine &Title) { const MachineFunction *MF = G.getMachineFunction(); O << "digraph {\n"; diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp index 01dccdb71e4b..a48c5400abcb 100644 --- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp +++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp @@ -26,7 +26,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -45,7 +45,7 @@ using namespace llvm; /// DomainValue for each register, but it may contain multiple execution /// domains. A register value is initially created in a single execution /// domain, but if we were forced to pay the penalty of a domain crossing, we -/// keep track of the fact the the register is now available in multiple +/// keep track of the fact that the register is now available in multiple /// domains. namespace { struct DomainValue { @@ -57,8 +57,10 @@ struct DomainValue { // domains where the register is available for free. unsigned AvailableDomains; - // Position of the last defining instruction. - unsigned Dist; + // Pointer to the next DomainValue in a chain. When two DomainValues are + // merged, Victim.Next is set to point to Victor, so old DomainValue + // references can be updated by folowing the chain. + DomainValue *Next; // Twiddleable instructions using or defining these registers. SmallVector Instrs; @@ -92,15 +94,32 @@ struct DomainValue { return CountTrailingZeros_32(AvailableDomains); } - DomainValue() { clear(); } + DomainValue() : Refs(0) { clear(); } + // Clear this DomainValue and point to next which has all its data. void clear() { - Refs = AvailableDomains = Dist = 0; + AvailableDomains = 0; + Next = 0; Instrs.clear(); } }; } +namespace { +/// LiveReg - Information about a live register. +struct LiveReg { + /// Value currently in this register, or NULL when no value is being tracked. + /// This counts as a DomainValue reference. + DomainValue *Value; + + /// Instruction that defined this register, relative to the beginning of the + /// current basic block. When a LiveReg is used to represent a live-out + /// register, this value is relative to the end of the basic block, so it + /// will be a negative number. + int Def; +}; +} // anonynous namespace + namespace { class ExeDepsFix : public MachineFunctionPass { static char ID; @@ -111,13 +130,19 @@ class ExeDepsFix : public MachineFunctionPass { MachineFunction *MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - MachineBasicBlock *MBB; std::vector AliasMap; const unsigned NumRegs; - DomainValue **LiveRegs; - typedef DenseMap LiveOutMap; + LiveReg *LiveRegs; + typedef DenseMap LiveOutMap; LiveOutMap LiveOuts; - unsigned Distance; + + /// Current instruction number. + /// The first instruction in each basic block is 0. + int CurInstr; + + /// True when the current block has a predecessor that hasn't been visited + /// yet. + bool SeenUnknownBackEdge; public: ExeDepsFix(const TargetRegisterClass *rc) @@ -131,26 +156,33 @@ class ExeDepsFix : public MachineFunctionPass { virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { - return "SSE execution domain fixup"; + return "Execution dependency fix"; } private: // Register mapping. - int RegIndex(unsigned Reg); + int regIndex(unsigned Reg); // DomainValue allocation. - DomainValue *Alloc(int domain = -1); - void Recycle(DomainValue*); + DomainValue *alloc(int domain = -1); + DomainValue *retain(DomainValue *DV) { + if (DV) ++DV->Refs; + return DV; + } + void release(DomainValue*); + DomainValue *resolve(DomainValue*&); // LiveRegs manipulations. - void SetLiveReg(int rx, DomainValue *DV); - void Kill(int rx); - void Force(int rx, unsigned domain); - void Collapse(DomainValue *dv, unsigned domain); - bool Merge(DomainValue *A, DomainValue *B); + void setLiveReg(int rx, DomainValue *DV); + void kill(int rx); + void force(int rx, unsigned domain); + void collapse(DomainValue *dv, unsigned domain); + bool merge(DomainValue *A, DomainValue *B); - void enterBasicBlock(); - void visitGenericInstr(MachineInstr*); + void enterBasicBlock(MachineBasicBlock*); + void leaveBasicBlock(MachineBasicBlock*); + void visitInstr(MachineInstr*); + void processDefs(MachineInstr*, bool Kill); void visitSoftInstr(MachineInstr*, unsigned mask); void visitHardInstr(MachineInstr*, unsigned domain); }; @@ -160,83 +192,108 @@ char ExeDepsFix::ID = 0; /// Translate TRI register number to an index into our smaller tables of /// interesting registers. Return -1 for boring registers. -int ExeDepsFix::RegIndex(unsigned Reg) { +int ExeDepsFix::regIndex(unsigned Reg) { assert(Reg < AliasMap.size() && "Invalid register"); return AliasMap[Reg]; } -DomainValue *ExeDepsFix::Alloc(int domain) { +DomainValue *ExeDepsFix::alloc(int domain) { DomainValue *dv = Avail.empty() ? new(Allocator.Allocate()) DomainValue : Avail.pop_back_val(); - dv->Dist = Distance; if (domain >= 0) dv->addDomain(domain); + assert(dv->Refs == 0 && "Reference count wasn't cleared"); + assert(!dv->Next && "Chained DomainValue shouldn't have been recycled"); return dv; } -void ExeDepsFix::Recycle(DomainValue *dv) { - assert(dv && "Cannot recycle NULL"); - dv->clear(); - Avail.push_back(dv); +/// release - Release a reference to DV. When the last reference is released, +/// collapse if needed. +void ExeDepsFix::release(DomainValue *DV) { + while (DV) { + assert(DV->Refs && "Bad DomainValue"); + if (--DV->Refs) + return; + + // There are no more DV references. Collapse any contained instructions. + if (DV->AvailableDomains && !DV->isCollapsed()) + collapse(DV, DV->getFirstDomain()); + + DomainValue *Next = DV->Next; + DV->clear(); + Avail.push_back(DV); + // Also release the next DomainValue in the chain. + DV = Next; + } +} + +/// resolve - Follow the chain of dead DomainValues until a live DomainValue is +/// reached. Update the referenced pointer when necessary. +DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) { + DomainValue *DV = DVRef; + if (!DV || !DV->Next) + return DV; + + // DV has a chain. Find the end. + do DV = DV->Next; + while (DV->Next); + + // Update DVRef to point to DV. + retain(DV); + release(DVRef); + DVRef = DV; + return DV; } /// Set LiveRegs[rx] = dv, updating reference counts. -void ExeDepsFix::SetLiveReg(int rx, DomainValue *dv) { +void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) { assert(unsigned(rx) < NumRegs && "Invalid index"); - if (!LiveRegs) { - LiveRegs = new DomainValue*[NumRegs]; - std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0); - } + assert(LiveRegs && "Must enter basic block first."); - if (LiveRegs[rx] == dv) + if (LiveRegs[rx].Value == dv) return; - if (LiveRegs[rx]) { - assert(LiveRegs[rx]->Refs && "Bad refcount"); - if (--LiveRegs[rx]->Refs == 0) Recycle(LiveRegs[rx]); - } - LiveRegs[rx] = dv; - if (dv) ++dv->Refs; + if (LiveRegs[rx].Value) + release(LiveRegs[rx].Value); + LiveRegs[rx].Value = retain(dv); } // Kill register rx, recycle or collapse any DomainValue. -void ExeDepsFix::Kill(int rx) { +void ExeDepsFix::kill(int rx) { assert(unsigned(rx) < NumRegs && "Invalid index"); - if (!LiveRegs || !LiveRegs[rx]) return; + assert(LiveRegs && "Must enter basic block first."); + if (!LiveRegs[rx].Value) + return; - // Before killing the last reference to an open DomainValue, collapse it to - // the first available domain. - if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->isCollapsed()) - Collapse(LiveRegs[rx], LiveRegs[rx]->getFirstDomain()); - else - SetLiveReg(rx, 0); + release(LiveRegs[rx].Value); + LiveRegs[rx].Value = 0; } /// Force register rx into domain. -void ExeDepsFix::Force(int rx, unsigned domain) { +void ExeDepsFix::force(int rx, unsigned domain) { assert(unsigned(rx) < NumRegs && "Invalid index"); - DomainValue *dv; - if (LiveRegs && (dv = LiveRegs[rx])) { + assert(LiveRegs && "Must enter basic block first."); + if (DomainValue *dv = LiveRegs[rx].Value) { if (dv->isCollapsed()) dv->addDomain(domain); else if (dv->hasDomain(domain)) - Collapse(dv, domain); + collapse(dv, domain); else { // This is an incompatible open DomainValue. Collapse it to whatever and // force the new value into domain. This costs a domain crossing. - Collapse(dv, dv->getFirstDomain()); - assert(LiveRegs[rx] && "Not live after collapse?"); - LiveRegs[rx]->addDomain(domain); + collapse(dv, dv->getFirstDomain()); + assert(LiveRegs[rx].Value && "Not live after collapse?"); + LiveRegs[rx].Value->addDomain(domain); } } else { // Set up basic collapsed DomainValue. - SetLiveReg(rx, Alloc(domain)); + setLiveReg(rx, alloc(domain)); } } /// Collapse open DomainValue into given domain. If there are multiple /// registers using dv, they each get a unique collapsed DomainValue. -void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) { +void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) { assert(dv->hasDomain(domain) && "Cannot collapse"); // Collapse all the instructions. @@ -247,13 +304,13 @@ void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) { // If there are multiple users, give them new, unique DomainValues. if (LiveRegs && dv->Refs > 1) for (unsigned rx = 0; rx != NumRegs; ++rx) - if (LiveRegs[rx] == dv) - SetLiveReg(rx, Alloc(domain)); + if (LiveRegs[rx].Value == dv) + setLiveReg(rx, alloc(domain)); } /// Merge - All instructions and registers in B are moved to A, and B is /// released. -bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) { +bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { assert(!A->isCollapsed() && "Cannot merge into collapsed"); assert(!B->isCollapsed() && "Cannot merge from collapsed"); if (A == B) @@ -263,47 +320,188 @@ bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) { if (!common) return false; A->AvailableDomains = common; - A->Dist = std::max(A->Dist, B->Dist); A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); + + // Clear the old DomainValue so we won't try to swizzle instructions twice. + B->clear(); + // All uses of B are referred to A. + B->Next = retain(A); + for (unsigned rx = 0; rx != NumRegs; ++rx) - if (LiveRegs[rx] == B) - SetLiveReg(rx, A); + if (LiveRegs[rx].Value == B) + setLiveReg(rx, A); return true; } -void ExeDepsFix::enterBasicBlock() { - // Try to coalesce live-out registers from predecessors. - for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), +// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values. +void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { + // Detect back-edges from predecessors we haven't processed yet. + SeenUnknownBackEdge = false; + + // Reset instruction counter in each basic block. + CurInstr = 0; + + // Set up LiveRegs to represent registers entering MBB. + if (!LiveRegs) + LiveRegs = new LiveReg[NumRegs]; + + // Default values are 'nothing happened a long time ago'. + for (unsigned rx = 0; rx != NumRegs; ++rx) { + LiveRegs[rx].Value = 0; + LiveRegs[rx].Def = -(1 << 20); + } + + // This is the entry block. + if (MBB->pred_empty()) { + for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), e = MBB->livein_end(); i != e; ++i) { - int rx = RegIndex(*i); - if (rx < 0) continue; - for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), - pe = MBB->pred_end(); pi != pe; ++pi) { - LiveOutMap::const_iterator fi = LiveOuts.find(*pi); - if (fi == LiveOuts.end()) continue; - DomainValue *pdv = fi->second[rx]; - if (!pdv) continue; - if (!LiveRegs || !LiveRegs[rx]) { - SetLiveReg(rx, pdv); + int rx = regIndex(*i); + if (rx < 0) + continue; + // Treat function live-ins as if they were defined just before the first + // instruction. Usually, function arguments are set up immediately + // before the call. + LiveRegs[rx].Def = -1; + } + DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n"); + return; + } + + // Try to coalesce live-out registers from predecessors. + for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), + pe = MBB->pred_end(); pi != pe; ++pi) { + LiveOutMap::const_iterator fi = LiveOuts.find(*pi); + if (fi == LiveOuts.end()) { + SeenUnknownBackEdge = true; + continue; + } + assert(fi->second && "Can't have NULL entries"); + + for (unsigned rx = 0; rx != NumRegs; ++rx) { + // Use the most recent predecessor def for each register. + LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, fi->second[rx].Def); + + DomainValue *pdv = resolve(fi->second[rx].Value); + if (!pdv) + continue; + if (!LiveRegs[rx].Value) { + setLiveReg(rx, pdv); continue; } // We have a live DomainValue from more than one predecessor. - if (LiveRegs[rx]->isCollapsed()) { + if (LiveRegs[rx].Value->isCollapsed()) { // We are already collapsed, but predecessor is not. Force him. - unsigned domain = LiveRegs[rx]->getFirstDomain(); - if (!pdv->isCollapsed() && pdv->hasDomain(domain)) - Collapse(pdv, domain); + unsigned Domain = LiveRegs[rx].Value->getFirstDomain(); + if (!pdv->isCollapsed() && pdv->hasDomain(Domain)) + collapse(pdv, Domain); continue; } // Currently open, merge in predecessor. if (!pdv->isCollapsed()) - Merge(LiveRegs[rx], pdv); + merge(LiveRegs[rx].Value, pdv); else - Force(rx, pdv->getFirstDomain()); + force(rx, pdv->getFirstDomain()); } } + DEBUG(dbgs() << "BB#" << MBB->getNumber() + << (SeenUnknownBackEdge ? ": incomplete\n" : ": all preds known\n")); +} + +void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { + assert(LiveRegs && "Must enter basic block first."); + // Save live registers at end of MBB - used by enterBasicBlock(). + // Also use LiveOuts as a visited set to detect back-edges. + bool First = LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second; + + if (First) { + // LiveRegs was inserted in LiveOuts. Adjust all defs to be relative to + // the end of this block instead of the beginning. + for (unsigned i = 0, e = NumRegs; i != e; ++i) + LiveRegs[i].Def -= CurInstr; + } else { + // Insertion failed, this must be the second pass. + // Release all the DomainValues instead of keeping them. + for (unsigned i = 0, e = NumRegs; i != e; ++i) + release(LiveRegs[i].Value); + delete[] LiveRegs; + } + LiveRegs = 0; +} + +void ExeDepsFix::visitInstr(MachineInstr *MI) { + if (MI->isDebugValue()) + return; + + // Update instructions with explicit execution domains. + std::pair DomP = TII->getExecutionDomain(MI); + if (DomP.first) { + if (DomP.second) + visitSoftInstr(MI, DomP.second); + else + visitHardInstr(MI, DomP.first); + } + + // Process defs to track register ages, and kill values clobbered by generic + // instructions. + processDefs(MI, !DomP.first); +} + +// Update def-ages for registers defined by MI. +// If Kill is set, also kill off DomainValues clobbered by the defs. +void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { + assert(!MI->isDebugValue() && "Won't process debug values"); + const MCInstrDesc &MCID = MI->getDesc(); + for (unsigned i = 0, + e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); + i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isImplicit()) + break; + if (MO.isUse()) + continue; + int rx = regIndex(MO.getReg()); + if (rx < 0) + continue; + + // This instruction explicitly defines rx. + DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr + << '\t' << *MI); + + // How many instructions since rx was last written? + unsigned Clearance = CurInstr - LiveRegs[rx].Def; + LiveRegs[rx].Def = CurInstr; + + // Kill off domains redefined by generic instructions. + if (Kill) + kill(rx); + + // Verify clearance before partial register updates. + unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); + if (!Pref) + continue; + DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); + if (Pref > Clearance) { + DEBUG(dbgs() << ": Break dependency.\n"); + TII->breakPartialRegDependency(MI, i, TRI); + continue; + } + + // The current clearance seems OK, but we may be ignoring a def from a + // back-edge. + if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { + DEBUG(dbgs() << ": OK.\n"); + continue; + } + + // A def from an unprocessed back-edge may make us break this dependency. + DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); + } + + ++CurInstr; } // A hard instruction only works in one domain. All input registers will be @@ -314,19 +512,19 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); + int rx = regIndex(mo.getReg()); if (rx < 0) continue; - Force(rx, domain); + force(rx, domain); } // Kill all defs and force them. for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); + int rx = regIndex(mo.getReg()); if (rx < 0) continue; - Kill(rx); - Force(rx, domain); + kill(rx); + force(rx, domain); } } @@ -343,9 +541,9 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); + int rx = regIndex(mo.getReg()); if (rx < 0) continue; - if (DomainValue *dv = LiveRegs[rx]) { + if (DomainValue *dv = LiveRegs[rx].Value) { // Bitmask of domains that dv and available have in common. unsigned common = dv->getCommonDomains(available); // Is it possible to use this collapsed register for free? @@ -360,7 +558,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { else // Open DomainValue is not compatible with instruction. It is useless // now. - Kill(rx); + kill(rx); } } @@ -374,94 +572,89 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Kill off any remaining uses that don't match available, and build a list of // incoming DomainValues that we want to merge. - SmallVector doms; + SmallVector Regs; for (SmallVector::iterator i=used.begin(), e=used.end(); i!=e; ++i) { int rx = *i; - DomainValue *dv = LiveRegs[rx]; + const LiveReg &LR = LiveRegs[rx]; // This useless DomainValue could have been missed above. - if (!dv->getCommonDomains(available)) { - Kill(*i); + if (!LR.Value->getCommonDomains(available)) { + kill(rx); continue; } - // sorted, uniqued insert. - bool inserted = false; - for (SmallVector::iterator i = doms.begin(), e = doms.end(); - i != e && !inserted; ++i) { - if (dv == *i) - inserted = true; - else if (dv->Dist < (*i)->Dist) { - inserted = true; - doms.insert(i, dv); + // Sorted insertion. + bool Inserted = false; + for (SmallVector::iterator i = Regs.begin(), e = Regs.end(); + i != e && !Inserted; ++i) { + if (LR.Def < i->Def) { + Inserted = true; + Regs.insert(i, LR); } } - if (!inserted) - doms.push_back(dv); + if (!Inserted) + Regs.push_back(LR); } // doms are now sorted in order of appearance. Try to merge them all, giving // priority to the latest ones. DomainValue *dv = 0; - while (!doms.empty()) { + while (!Regs.empty()) { if (!dv) { - dv = doms.pop_back_val(); + dv = Regs.pop_back_val().Value; + // Force the first dv to match the current instruction. + dv->AvailableDomains = dv->getCommonDomains(available); + assert(dv->AvailableDomains && "Domain should have been filtered"); continue; } - DomainValue *latest = doms.pop_back_val(); - if (Merge(dv, latest)) continue; + DomainValue *Latest = Regs.pop_back_val().Value; + // Skip already merged values. + if (Latest == dv || Latest->Next) + continue; + if (merge(dv, Latest)) + continue; // If latest didn't merge, it is useless now. Kill all registers using it. for (SmallVector::iterator i=used.begin(), e=used.end(); i != e; ++i) - if (LiveRegs[*i] == latest) - Kill(*i); + if (LiveRegs[*i].Value == Latest) + kill(*i); } // dv is the DomainValue we are going to use for this instruction. - if (!dv) - dv = Alloc(); - dv->Dist = Distance; - dv->AvailableDomains = available; + if (!dv) { + dv = alloc(); + dv->AvailableDomains = available; + } dv->Instrs.push_back(mi); // Finally set all defs and non-collapsed uses to dv. for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); + int rx = regIndex(mo.getReg()); if (rx < 0) continue; - if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) { - Kill(rx); - SetLiveReg(rx, dv); + if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) { + kill(rx); + setLiveReg(rx, dv); } } } -void ExeDepsFix::visitGenericInstr(MachineInstr *mi) { - // Process explicit defs, kill any relevant registers redefined. - for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { - MachineOperand &mo = mi->getOperand(i); - if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); - if (rx < 0) continue; - Kill(rx); - } -} - bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { MF = &mf; TII = MF->getTarget().getInstrInfo(); TRI = MF->getTarget().getRegisterInfo(); - MBB = 0; LiveRegs = 0; - Distance = 0; assert(NumRegs == RC->getNumRegs() && "Bad regclass"); + DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: " + << RC->getName() << " **********\n"); + // If no relevant registers are used in the function, we can skip it // completely. bool anyregs = false; for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end(); I != E; ++I) - if (MF->getRegInfo().isPhysRegUsed(*I)) { + if (MF->getRegInfo().isPhysRegOrOverlapUsed(*I)) { anyregs = true; break; } @@ -473,43 +666,48 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { // or -1. AliasMap.resize(TRI->getNumRegs(), -1); for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) - for (const unsigned *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI) + for (const uint16_t *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI) AliasMap[*AI] = i; } MachineBasicBlock *Entry = MF->begin(); - SmallPtrSet Visited; - for (df_ext_iterator > - DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited); - DFI != DFE; ++DFI) { - MBB = *DFI; - enterBasicBlock(); + ReversePostOrderTraversal RPOT(Entry); + SmallVector Loops; + for (ReversePostOrderTraversal::rpo_iterator + MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { + MachineBasicBlock *MBB = *MBBI; + enterBasicBlock(MBB); + if (SeenUnknownBackEdge) + Loops.push_back(MBB); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) { - MachineInstr *mi = I; - if (mi->isDebugValue()) continue; - ++Distance; - std::pair domp = TII->getExecutionDomain(mi); - if (domp.first) - if (domp.second) - visitSoftInstr(mi, domp.second); - else - visitHardInstr(mi, domp.first); - else if (LiveRegs) - visitGenericInstr(mi); - } - - // Save live registers at end of MBB - used by enterBasicBlock(). - if (LiveRegs) - LiveOuts.insert(std::make_pair(MBB, LiveRegs)); - LiveRegs = 0; + ++I) + visitInstr(I); + leaveBasicBlock(MBB); } - // Clear the LiveOuts vectors. Should we also collapse any remaining - // DomainValues? - for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end(); - i != e; ++i) - delete[] i->second; + // Visit all the loop blocks again in order to merge DomainValues from + // back-edges. + for (unsigned i = 0, e = Loops.size(); i != e; ++i) { + MachineBasicBlock *MBB = Loops[i]; + enterBasicBlock(MBB); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) + if (!I->isDebugValue()) + processDefs(I, false); + leaveBasicBlock(MBB); + } + + // Clear the LiveOuts vectors and collapse any remaining DomainValues. + for (ReversePostOrderTraversal::rpo_iterator + MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { + LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI); + if (FI == LiveOuts.end() || !FI->second) + continue; + for (unsigned i = 0, e = NumRegs; i != e; ++i) + if (FI->second[i].Value) + release(FI->second[i].Value); + delete[] FI->second; + } LiveOuts.clear(); Avail.clear(); Allocator.DestroyAll(); diff --git a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp index a67140ece4a5..2c4a93543cc3 100644 --- a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp @@ -32,10 +32,6 @@ namespace { private: virtual bool runOnMachineFunction(MachineFunction &MF); - const char *getPassName() const { - return "Expand ISel Pseudo-instructions"; - } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } @@ -43,12 +39,9 @@ namespace { } // end anonymous namespace char ExpandISelPseudos::ID = 0; +char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID; INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos", - "Expand CodeGen Pseudo-instructions", false, false) - -FunctionPass *llvm::createExpandISelPseudosPass() { - return new ExpandISelPseudos(); -} + "Expand ISel Pseudo-instructions", false, false) bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; @@ -62,8 +55,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { MachineInstr *MI = MBBI++; // If MI is a pseudo, expand it. - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.usesCustomInsertionHook()) { + if (MI->usesCustomInsertionHook()) { Changed = true; MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB); diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index e2a14a8dfd97..b14afc286d49 100644 --- a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -36,10 +36,6 @@ struct ExpandPostRA : public MachineFunctionPass { static char ID; // Pass identification, replacement for typeid ExpandPostRA() : MachineFunctionPass(ID) {} - const char *getPassName() const { - return "Post-RA pseudo instruction expansion pass"; - } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreservedID(MachineLoopInfoID); @@ -61,10 +57,10 @@ struct ExpandPostRA : public MachineFunctionPass { } // end anonymous namespace char ExpandPostRA::ID = 0; +char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID; -FunctionPass *llvm::createExpandPostRAPseudosPass() { - return new ExpandPostRA(); -} +INITIALIZE_PASS(ExpandPostRA, "postrapseudos", + "Post-RA pseudo instruction expansion pass", false, false) /// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead, /// and the lowered replacement instructions immediately precede it. @@ -207,7 +203,7 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { ++mi; // Only expand pseudos. - if (!MI->getDesc().isPseudo()) + if (!MI->isPseudo()) continue; // Give targets a chance to expand even standard pseudos. diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp index d757cf409d50..1caf8c233976 100644 --- a/contrib/llvm/lib/CodeGen/GCMetadata.cpp +++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp @@ -143,12 +143,12 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const { static const char *DescKind(GC::PointKind Kind) { switch (Kind) { - default: llvm_unreachable("Unknown GC point kind"); case GC::Loop: return "loop"; case GC::Return: return "return"; case GC::PreCall: return "pre-call"; case GC::PostCall: return "post-call"; } + llvm_unreachable("Invalid point kind"); } bool Printer::runOnFunction(Function &F) { @@ -156,12 +156,12 @@ bool Printer::runOnFunction(Function &F) { GCFunctionInfo *FD = &getAnalysis().getFunctionInfo(F); - OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n"; + OS << "GC roots for " << FD->getFunction().getName() << ":\n"; for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(), RE = FD->roots_end(); RI != RE; ++RI) OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n"; - OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n"; + OS << "GC safe points for " << FD->getFunction().getName() << ":\n"; for (GCFunctionInfo::iterator PI = FD->begin(), PE = FD->end(); PI != PE; ++PI) { diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp index 766c6ee542a9..506b5cf09457 100644 --- a/contrib/llvm/lib/CodeGen/GCStrategy.cpp +++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp @@ -10,8 +10,8 @@ // This file implements target- and collector-independent garbage collection // infrastructure. // -// MachineCodeAnalysis identifies the GC safe points in the machine code. Roots -// are identified in SelectionDAGISel. +// GCMachineCodeAnalysis identifies the GC safe points in the machine code. +// Roots are identified in SelectionDAGISel. // //===----------------------------------------------------------------------===// @@ -35,9 +35,9 @@ using namespace llvm; namespace { - + /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or - /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as + /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as /// directed by the GCStrategy. It also performs automatic root initialization /// and custom intrinsic lowering. class LowerIntrinsics : public FunctionPass { @@ -47,47 +47,46 @@ namespace { bool PerformDefaultLowering(Function &F, GCStrategy &Coll); static bool InsertRootInitializers(Function &F, AllocaInst **Roots, unsigned Count); - + public: static char ID; - + LowerIntrinsics(); const char *getPassName() const; void getAnalysisUsage(AnalysisUsage &AU) const; - + bool doInitialization(Module &M); bool runOnFunction(Function &F); }; - - - /// MachineCodeAnalysis - This is a target-independent pass over the machine + + + /// GCMachineCodeAnalysis - This is a target-independent pass over the machine /// function representation to identify safe points for the garbage collector /// in the machine code. It inserts labels at safe points and populates a /// GCMetadata record for each function. - class MachineCodeAnalysis : public MachineFunctionPass { + class GCMachineCodeAnalysis : public MachineFunctionPass { const TargetMachine *TM; GCFunctionInfo *FI; MachineModuleInfo *MMI; const TargetInstrInfo *TII; - + void FindSafePoints(MachineFunction &MF); void VisitCallPoint(MachineBasicBlock::iterator MI); - MCSymbol *InsertLabel(MachineBasicBlock &MBB, + MCSymbol *InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL) const; - + void FindStackOffsets(MachineFunction &MF); - + public: static char ID; - - MachineCodeAnalysis(); - const char *getPassName() const; + + GCMachineCodeAnalysis(); void getAnalysisUsage(AnalysisUsage &AU) const; - + bool runOnMachineFunction(MachineFunction &MF); }; - + } // ----------------------------------------------------------------------------- @@ -97,6 +96,7 @@ GCStrategy::GCStrategy() : CustomReadBarriers(false), CustomWriteBarriers(false), CustomRoots(false), + CustomSafePoints(false), InitRoots(true), UsesMetadata(false) {} @@ -104,18 +104,24 @@ GCStrategy::GCStrategy() : GCStrategy::~GCStrategy() { for (iterator I = begin(), E = end(); I != E; ++I) delete *I; - + Functions.clear(); } - + bool GCStrategy::initializeCustomLowering(Module &M) { return false; } - + bool GCStrategy::performCustomLowering(Function &F) { dbgs() << "gc " << getName() << " must override performCustomLowering.\n"; - llvm_unreachable(0); - return 0; + llvm_unreachable("must override performCustomLowering"); } + +bool GCStrategy::findCustomSafePoints(GCFunctionInfo& FI, MachineFunction &F) { + dbgs() << "gc " << getName() << " must override findCustomSafePoints.\n"; + llvm_unreachable(0); +} + + GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) { GCFunctionInfo *FI = new GCFunctionInfo(F, *this); Functions.push_back(FI); @@ -132,7 +138,7 @@ INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false) FunctionPass *llvm::createGCLoweringPass() { return new LowerIntrinsics(); } - + char LowerIntrinsics::ID = 0; LowerIntrinsics::LowerIntrinsics() @@ -143,7 +149,7 @@ LowerIntrinsics::LowerIntrinsics() const char *LowerIntrinsics::getPassName() const { return "Lower Garbage Collection Instructions"; } - + void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const { FunctionPass::getAnalysisUsage(AU); AU.addRequired(); @@ -161,22 +167,22 @@ bool LowerIntrinsics::doInitialization(Module &M) { for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) if (!I->isDeclaration() && I->hasGC()) MI->getFunctionInfo(*I); // Instantiate the GC strategy. - + bool MadeChange = false; for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I) if (NeedsCustomLoweringPass(**I)) if ((*I)->initializeCustomLowering(M)) MadeChange = true; - + return MadeChange; } -bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, +bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, unsigned Count) { // Scroll past alloca instructions. BasicBlock::iterator IP = F.getEntryBlock().begin(); while (isa(IP)) ++IP; - + // Search for initializers in the initial BB. SmallPtrSet InitedRoots; for (; !CouldBecomeSafePoint(IP); ++IP) @@ -184,10 +190,10 @@ bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, if (AllocaInst *AI = dyn_cast(SI->getOperand(1)->stripPointerCasts())) InitedRoots.insert(AI); - + // Add root initializers. bool MadeChange = false; - + for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I) if (!InitedRoots.count(*I)) { StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast( @@ -196,7 +202,7 @@ bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, SI->insertAfter(*I); MadeChange = true; } - + return MadeChange; } @@ -220,26 +226,26 @@ bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) { bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) { // The natural definition of instructions which could introduce safe points // are: - // + // // - call, invoke (AfterCall, BeforeCall) // - phis (Loops) // - invoke, ret, unwind (Exit) - // + // // However, instructions as seemingly inoccuous as arithmetic can become // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead // it is necessary to take a conservative approach. - + if (isa(I) || isa(I) || isa(I) || isa(I)) return false; - + // llvm.gcroot is safe because it doesn't do anything at runtime. if (CallInst *CI = dyn_cast(I)) if (Function *F = CI->getCalledFunction()) if (unsigned IID = F->getIntrinsicID()) if (IID == Intrinsic::gcroot) return false; - + return true; } @@ -249,15 +255,15 @@ bool LowerIntrinsics::runOnFunction(Function &F) { // Quick exit for functions that do not use GC. if (!F.hasGC()) return false; - + GCFunctionInfo &FI = getAnalysis().getFunctionInfo(F); GCStrategy &S = FI.getStrategy(); - + bool MadeChange = false; - + if (NeedsDefaultLoweringPass(S)) MadeChange |= PerformDefaultLowering(F, S); - + bool UseCustomLoweringPass = NeedsCustomLoweringPass(S); if (UseCustomLoweringPass) MadeChange |= S.performCustomLowering(F); @@ -275,9 +281,9 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { bool LowerWr = !S.customWriteBarrier(); bool LowerRd = !S.customReadBarrier(); bool InitRoots = S.initializeRoots(); - + SmallVector Roots; - + bool MadeChange = false; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { @@ -313,104 +319,104 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { default: continue; } - + MadeChange = true; } } } - + if (Roots.size()) MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size()); - + return MadeChange; } // ----------------------------------------------------------------------------- -FunctionPass *llvm::createGCMachineCodeAnalysisPass() { - return new MachineCodeAnalysis(); -} +char GCMachineCodeAnalysis::ID = 0; +char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID; -char MachineCodeAnalysis::ID = 0; +INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis", + "Analyze Machine Code For Garbage Collection", false, false) -MachineCodeAnalysis::MachineCodeAnalysis() +GCMachineCodeAnalysis::GCMachineCodeAnalysis() : MachineFunctionPass(ID) {} -const char *MachineCodeAnalysis::getPassName() const { - return "Analyze Machine Code For Garbage Collection"; -} - -void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { +void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); AU.setPreservesAll(); AU.addRequired(); AU.addRequired(); } -MCSymbol *MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - DebugLoc DL) const { +MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const { MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol(); BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); return Label; } -void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) { +void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) { // Find the return address (next instruction), too, so as to bracket the call // instruction. - MachineBasicBlock::iterator RAI = CI; - ++RAI; - + MachineBasicBlock::iterator RAI = CI; + ++RAI; + if (FI->getStrategy().needsSafePoint(GC::PreCall)) { MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc()); FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc()); } - + if (FI->getStrategy().needsSafePoint(GC::PostCall)) { MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc()); FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc()); } } -void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { +void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE; ++BBI) for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end(); MI != ME; ++MI) - if (MI->getDesc().isCall()) + if (MI->isCall()) VisitCallPoint(MI); } -void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { +void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { const TargetFrameLowering *TFI = TM->getFrameLowering(); assert(TFI && "TargetRegisterInfo not available!"); - + for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(), RE = FI->roots_end(); RI != RE; ++RI) RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num); } -bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { +bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { // Quick exit for functions that do not use GC. if (!MF.getFunction()->hasGC()) return false; - + FI = &getAnalysis().getFunctionInfo(*MF.getFunction()); if (!FI->getStrategy().needsSafePoints()) return false; - + TM = &MF.getTarget(); MMI = &getAnalysis(); TII = TM->getInstrInfo(); - + // Find the size of the stack frame. FI->setFrameSize(MF.getFrameInfo()->getStackSize()); - + // Find all safe points. - FindSafePoints(MF); - + if (FI->getStrategy().customSafePoints()) { + FI->getStrategy().findCustomSafePoints(*FI, MF); + } else { + FindSafePoints(MF); + } + // Find the stack offsets for all roots. FindStackOffsets(MF); - + return false; } diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp index ce7ed293daac..75ae5b9c2c27 100644 --- a/contrib/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -62,6 +62,7 @@ STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed"); STATISTIC(NumDiamonds, "Number of diamond if-conversions performed"); STATISTIC(NumIfConvBBs, "Number of if-converted blocks"); STATISTIC(NumDupBBs, "Number of duplicated blocks"); +STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated"); namespace { class IfConverter : public MachineFunctionPass { @@ -169,7 +170,6 @@ namespace { } virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { return "If Converter"; } private: bool ReverseBranchCondition(BBInfo &BBI); @@ -195,7 +195,8 @@ namespace { void PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl &Cond, - SmallSet &Redefs); + SmallSet &Redefs, + SmallSet *LaterRedefs = 0); void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl &Cond, SmallSet &Redefs, @@ -251,12 +252,12 @@ namespace { char IfConverter::ID = 0; } +char &llvm::IfConverterID = IfConverter::ID; + INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false) -FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); } - bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TLI = MF.getTarget().getTargetLowering(); TII = MF.getTarget().getInstrInfo(); @@ -313,8 +314,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { bool RetVal = false; switch (Kind) { - default: assert(false && "Unexpected!"); - break; + default: llvm_unreachable("Unexpected!"); case ICSimple: case ICSimpleFalse: { bool isFalse = Kind == ICSimpleFalse; @@ -573,12 +573,12 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, // blocks, move the end iterators up past any branch instructions. while (TIE != TIB) { --TIE; - if (!TIE->getDesc().isBranch()) + if (!TIE->isBranch()) break; } while (FIE != FIB) { --FIE; - if (!FIE->getDesc().isBranch()) + if (!FIE->isBranch()) break; } @@ -651,12 +651,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { if (I->isDebugValue()) continue; - const MCInstrDesc &MCID = I->getDesc(); - if (MCID.isNotDuplicable()) + if (I->isNotDuplicable()) BBI.CannotBeCopied = true; bool isPredicated = TII->isPredicated(I); - bool isCondBr = BBI.IsBrAnalyzable && MCID.isConditionalBranch(); + bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch(); if (!isCondBr) { if (!isPredicated) { @@ -963,7 +962,7 @@ static void InitPredRedefs(MachineBasicBlock *BB, SmallSet &Redefs, E = BB->livein_end(); I != E; ++I) { unsigned Reg = *I; Redefs.insert(Reg); - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) Redefs.insert(*Subreg); } @@ -984,7 +983,7 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet &Redefs, Defs.push_back(Reg); else if (MO.isKill()) { Redefs.erase(Reg); - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) Redefs.erase(*SR); } } @@ -997,7 +996,7 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet &Redefs, true/*IsImp*/,false/*IsKill*/)); } else { Redefs.insert(Reg); - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) Redefs.insert(*SR); } } @@ -1035,7 +1034,7 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { if (Kind == ICSimpleFalse) if (TII->ReverseBranchCondition(Cond)) - assert(false && "Unable to reverse branch condition!"); + llvm_unreachable("Unable to reverse branch condition!"); // Initialize liveins to the first BB. These are potentiall redefined by // predicated instructions. @@ -1108,7 +1107,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) if (TII->ReverseBranchCondition(Cond)) - assert(false && "Unable to reverse branch condition!"); + llvm_unreachable("Unable to reverse branch condition!"); if (Kind == ICTriangleRev || Kind == ICTriangleFRev) { if (ReverseBranchCondition(*CvtBBI)) { @@ -1155,7 +1154,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { SmallVector RevCond(CvtBBI->BrCond.begin(), CvtBBI->BrCond.end()); if (TII->ReverseBranchCondition(RevCond)) - assert(false && "Unable to reverse branch condition!"); + llvm_unreachable("Unable to reverse branch condition!"); TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl); BBI.BB->addSuccessor(CvtBBI->FalseBB); } @@ -1227,7 +1226,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, BBInfo *BBI2 = &FalseBBI; SmallVector RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); if (TII->ReverseBranchCondition(RevCond)) - assert(false && "Unable to reverse branch condition!"); + llvm_unreachable("Unable to reverse branch condition!"); SmallVector *Cond1 = &BBI.BrCond; SmallVector *Cond2 = &RevCond; @@ -1281,7 +1280,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); BBI2->BB->erase(BBI2->BB->begin(), DI2); - // Predicate the 'true' block after removing its branch. + // Remove branch from 'true' block and remove duplicated instructions. BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); DI1 = BBI1->BB->end(); for (unsigned i = 0; i != NumDups2; ) { @@ -1294,9 +1293,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, ++i; } BBI1->BB->erase(DI1, BBI1->BB->end()); - PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs); - // Predicate the 'false' block. + // Remove 'false' block branch and find the last instruction to predicate. BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); DI2 = BBI2->BB->end(); while (NumDups2 != 0) { @@ -1308,6 +1306,55 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, if (!DI2->isDebugValue()) --NumDups2; } + + // Remember which registers would later be defined by the false block. + // This allows us not to predicate instructions in the true block that would + // later be re-defined. That is, rather than + // subeq r0, r1, #1 + // addne r0, r1, #1 + // generate: + // sub r0, r1, #1 + // addne r0, r1, #1 + SmallSet RedefsByFalse; + SmallSet ExtUses; + if (TII->isProfitableToUnpredicate(*BBI1->BB, *BBI2->BB)) { + for (MachineBasicBlock::iterator FI = BBI2->BB->begin(); FI != DI2; ++FI) { + if (FI->isDebugValue()) + continue; + SmallVector Defs; + for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = FI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) { + Defs.push_back(Reg); + } else if (!RedefsByFalse.count(Reg)) { + // These are defined before ctrl flow reach the 'false' instructions. + // They cannot be modified by the 'true' instructions. + ExtUses.insert(Reg); + for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + ExtUses.insert(*SR); + } + } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Reg = Defs[i]; + if (!ExtUses.count(Reg)) { + RedefsByFalse.insert(Reg); + for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + RedefsByFalse.insert(*SR); + } + } + } + } + + // Predicate the 'true' block. + PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs, &RedefsByFalse); + + // Predicate the 'false' block. PredicateBlock(*BBI2, DI2, *Cond2, Redefs); // Merge the true block into the entry of the diamond. @@ -1319,7 +1366,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // fold the tail block in as well. Otherwise, unless it falls through to the // tail, add a unconditional branch to it. if (TailBB) { - BBInfo TailBBI = BBAnalysis[TailBB->getNumber()]; + BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()]; bool CanMergeTail = !TailBBI.HasFallThrough; // There may still be a fall-through edge from BBI1 or BBI2 to TailBB; // check if there are any other predecessors besides those. @@ -1356,15 +1403,49 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, return true; } +static bool MaySpeculate(const MachineInstr *MI, + SmallSet &LaterRedefs, + const TargetInstrInfo *TII) { + bool SawStore = true; + if (!MI->isSafeToMove(TII, 0, SawStore)) + return false; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef() && !LaterRedefs.count(Reg)) + return false; + } + + return true; +} + /// PredicateBlock - Predicate instructions from the start of the block to the /// specified end with the specified condition. void IfConverter::PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl &Cond, - SmallSet &Redefs) { + SmallSet &Redefs, + SmallSet *LaterRedefs) { + bool AnyUnpred = false; + bool MaySpec = LaterRedefs != 0; for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) { if (I->isDebugValue() || TII->isPredicated(I)) continue; + // It may be possible not to predicate an instruction if it's the 'true' + // side of a diamond and the 'false' side may re-define the instruction's + // defs. + if (MaySpec && MaySpeculate(I, *LaterRedefs, TII)) { + AnyUnpred = true; + continue; + } + // If any instruction is predicated, then every instruction after it must + // be predicated. + MaySpec = false; if (!TII->PredicateInstruction(I, Cond)) { #ifndef NDEBUG dbgs() << "Unable to predicate " << *I << "!\n"; @@ -1383,6 +1464,8 @@ void IfConverter::PredicateBlock(BBInfo &BBI, BBI.NonPredSize = 0; ++NumIfConvBBs; + if (AnyUnpred) + ++NumUnpred; } /// CopyAndPredicateBlock - Copy and predicate instructions from source BB to @@ -1395,9 +1478,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, for (MachineBasicBlock::iterator I = FromBBI.BB->begin(), E = FromBBI.BB->end(); I != E; ++I) { - const MCInstrDesc &MCID = I->getDesc(); // Do not copy the end of the block branches. - if (IgnoreBr && MCID.isBranch()) + if (IgnoreBr && I->isBranch()) break; MachineInstr *MI = MF.CloneMachineInstr(I); diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp index 726af4696578..d5ea666e4a17 100644 --- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -14,14 +14,15 @@ #define DEBUG_TYPE "regalloc" #include "Spiller.h" -#include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -173,8 +174,7 @@ class InlineSpiller : public Spiller { void reMaterializeAll(); bool coalesceStackAccess(MachineInstr *MI, unsigned Reg); - bool foldMemoryOperand(MachineBasicBlock::iterator MI, - const SmallVectorImpl &Ops, + bool foldMemoryOperand(ArrayRef >, MachineInstr *LoadMI = 0); void insertReload(LiveInterval &NewLI, SlotIndex, MachineBasicBlock::iterator MI); @@ -578,7 +578,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { if (isSibling(SrcReg)) { LiveInterval &SrcLI = LIS.getInterval(SrcReg); - LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getUseIndex()); + LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getRegSlot(true)); assert(SrcLR && "Copy from non-existing value"); // Check if this COPY kills its source. SVI->second.KillsSource = (SrcLR->end == VNI->def); @@ -644,16 +644,18 @@ void InlineSpiller::analyzeSiblingValues() { if (VNI->isUnused()) continue; MachineInstr *DefMI = 0; + if (!VNI->isPHIDef()) { + DefMI = LIS.getInstructionFromIndex(VNI->def); + assert(DefMI && "No defining instruction"); + } // Check possible sibling copies. - if (VNI->isPHIDef() || VNI->getCopy()) { + if (VNI->isPHIDef() || DefMI->isCopy()) { VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def); assert(OrigVNI && "Def outside original live range"); if (OrigVNI->def != VNI->def) DefMI = traceSiblingValue(Reg, VNI, OrigVNI); } - if (!DefMI && !VNI->isPHIDef()) - DefMI = LIS.getInstructionFromIndex(VNI->def); - if (DefMI && Edit->checkRematerializable(VNI, DefMI, TII, AA)) { + if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) { DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def << " may remat from " << *DefMI); } @@ -665,8 +667,8 @@ void InlineSpiller::analyzeSiblingValues() { /// a spill at a better location. bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { SlotIndex Idx = LIS.getInstructionIndex(CopyMI); - VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getDefIndex()); - assert(VNI && VNI->def == Idx.getDefIndex() && "Not defined by copy"); + VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot()); + assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy"); SibValueMap::iterator I = SibValues.find(VNI); if (I == SibValues.end()) return false; @@ -726,7 +728,6 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { MRI.getRegClass(SVI.SpillReg), &TRI); --MII; // Point to store instruction. LIS.InsertMachineInstrInMaps(MII); - VRM.addSpillSlotUse(StackSlot, MII); DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII); ++NumSpills; @@ -760,7 +761,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { // Find all spills and copies of VNI. for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg); MachineInstr *MI = UI.skipInstruction();) { - if (!MI->isCopy() && !MI->getDesc().mayStore()) + if (!MI->isCopy() && !MI->mayStore()) continue; SlotIndex Idx = LIS.getInstructionIndex(MI); if (LI->getVNInfoAt(Idx) != VNI) @@ -770,9 +771,9 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { if (unsigned DstReg = isFullCopyOf(MI, Reg)) { if (isSibling(DstReg)) { LiveInterval &DstLI = LIS.getInterval(DstReg); - VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getDefIndex()); + VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot()); assert(DstVNI && "Missing defined value"); - assert(DstVNI->def == Idx.getDefIndex() && "Wrong copy def slot"); + assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot"); WorkList.push_back(std::make_pair(&DstLI, DstVNI)); } continue; @@ -811,7 +812,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { - VNInfo *PVNI = LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()); + VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI)); if (PVNI) WorkList.push_back(std::make_pair(LI, PVNI)); } @@ -824,7 +825,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { continue; LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg()); assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy"); - VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getUseIndex()); + VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true)); assert(SnipVNI && "Snippet undefined before copy"); WorkList.push_back(std::make_pair(&SnipLI, SnipVNI)); } while (!WorkList.empty()); @@ -833,7 +834,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineBasicBlock::iterator MI) { - SlotIndex UseIdx = LIS.getInstructionIndex(MI).getUseIndex(); + SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true); VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex()); if (!ParentVNI) { @@ -855,7 +856,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, SibValueMap::const_iterator SibI = SibValues.find(ParentVNI); if (SibI != SibValues.end()) RM.OrigMI = SibI->second.DefMI; - if (!Edit->canRematerializeAt(RM, UseIdx, false, LIS)) { + if (!Edit->canRematerializeAt(RM, UseIdx, false)) { markValueUsed(&VirtReg, ParentVNI); DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI); return false; @@ -863,42 +864,37 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, // If the instruction also writes VirtReg.reg, it had better not require the // same register for uses and defs. - bool Reads, Writes; - SmallVector Ops; - tie(Reads, Writes) = MI->readsWritesVirtualRegister(VirtReg.reg, &Ops); - if (Writes) { - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(Ops[i]); - if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) { - markValueUsed(&VirtReg, ParentVNI); - DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI); - return false; - } - } + SmallVector, 8> Ops; + MIBundleOperands::RegInfo RI = + MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops); + if (RI.Tied) { + markValueUsed(&VirtReg, ParentVNI); + DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI); + return false; } // Before rematerializing into a register for a single instruction, try to // fold a load into the instruction. That avoids allocating a new register. - if (RM.OrigMI->getDesc().canFoldAsLoad() && - foldMemoryOperand(MI, Ops, RM.OrigMI)) { + if (RM.OrigMI->canFoldAsLoad() && + foldMemoryOperand(Ops, RM.OrigMI)) { Edit->markRematerialized(RM.ParentVNI); ++NumFoldedLoads; return true; } // Alocate a new register for the remat. - LiveInterval &NewLI = Edit->createFrom(Original, LIS, VRM); + LiveInterval &NewLI = Edit->createFrom(Original); NewLI.markNotSpillable(); // Finally we can rematerialize OrigMI before MI. SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM, - LIS, TII, TRI); + TRI); DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *LIS.getInstructionFromIndex(DefIdx)); // Replace operands for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(Ops[i]); + MachineOperand &MO = MI->getOperand(Ops[i].second); if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) { MO.setReg(NewLI.reg); MO.setIsKill(); @@ -906,8 +902,8 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, } DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI); - VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI)); + VNInfo *DefVNI = NewLI.getNextValue(DefIdx, LIS.getVNInfoAllocator()); + NewLI.addRange(LiveRange(DefIdx, UseIdx.getRegSlot(), DefVNI)); DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); ++NumRemats; return true; @@ -917,7 +913,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, /// and trim the live ranges after. void InlineSpiller::reMaterializeAll() { // analyzeSiblingValues has already tested all relevant defining instructions. - if (!Edit->anyRematerializable(LIS, TII, AA)) + if (!Edit->anyRematerializable(AA)) return; UsedValues.clear(); @@ -929,7 +925,7 @@ void InlineSpiller::reMaterializeAll() { LiveInterval &LI = LIS.getInterval(Reg); for (MachineRegisterInfo::use_nodbg_iterator RI = MRI.use_nodbg_begin(Reg); - MachineInstr *MI = RI.skipInstruction();) + MachineInstr *MI = RI.skipBundle();) anyRemat |= reMaterializeFor(LI, MI); } if (!anyRemat) @@ -958,7 +954,7 @@ void InlineSpiller::reMaterializeAll() { if (DeadDefs.empty()) return; DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); - Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); // Get rid of deleted and empty intervals. for (unsigned i = RegsToSpill.size(); i != 0; --i) { @@ -970,7 +966,7 @@ void InlineSpiller::reMaterializeAll() { LiveInterval &LI = LIS.getInterval(Reg); if (!LI.empty()) continue; - Edit->eraseVirtReg(Reg, LIS); + Edit->eraseVirtReg(Reg); RegsToSpill.erase(RegsToSpill.begin() + (i - 1)); } DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n"); @@ -1008,23 +1004,35 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { return true; } -/// foldMemoryOperand - Try folding stack slot references in Ops into MI. -/// @param MI Instruction using or defining the current register. -/// @param Ops Operand indices from readsWritesVirtualRegister(). +/// foldMemoryOperand - Try folding stack slot references in Ops into their +/// instructions. +/// +/// @param Ops Operand indices from analyzeVirtReg(). /// @param LoadMI Load instruction to use instead of stack slot when non-null. -/// @return True on success, and MI will be erased. -bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, - const SmallVectorImpl &Ops, - MachineInstr *LoadMI) { +/// @return True on success. +bool InlineSpiller:: +foldMemoryOperand(ArrayRef > Ops, + MachineInstr *LoadMI) { + if (Ops.empty()) + return false; + // Don't attempt folding in bundles. + MachineInstr *MI = Ops.front().first; + if (Ops.back().first != MI || MI->isBundled()) + return false; + bool WasCopy = MI->isCopy(); + unsigned ImpReg = 0; + // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied // operands. SmallVector FoldOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - unsigned Idx = Ops[i]; + unsigned Idx = Ops[i].second; MachineOperand &MO = MI->getOperand(Idx); - if (MO.isImplicit()) + if (MO.isImplicit()) { + ImpReg = MO.getReg(); continue; + } // FIXME: Teach targets to deal with subregs. if (MO.getSubReg()) return false; @@ -1042,13 +1050,24 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, if (!FoldMI) return false; LIS.ReplaceMachineInstrInMaps(MI, FoldMI); - if (!LoadMI) - VRM.addSpillSlotUse(StackSlot, FoldMI); MI->eraseFromParent(); - DEBUG(dbgs() << "\tfolded: " << *FoldMI); + + // TII.foldMemoryOperand may have left some implicit operands on the + // instruction. Strip them. + if (ImpReg) + for (unsigned i = FoldMI->getNumOperands(); i; --i) { + MachineOperand &MO = FoldMI->getOperand(i - 1); + if (!MO.isReg() || !MO.isImplicit()) + break; + if (MO.getReg() == ImpReg) + FoldMI->RemoveOperand(i - 1); + } + + DEBUG(dbgs() << "\tfolded: " << LIS.getInstructionIndex(FoldMI) << '\t' + << *FoldMI); if (!WasCopy) ++NumFolded; - else if (Ops.front() == 0) + else if (Ops.front().second == 0) ++NumSpills; else ++NumReloads; @@ -1063,11 +1082,9 @@ void InlineSpiller::insertReload(LiveInterval &NewLI, TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot, MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to load instruction. - SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex(); - VRM.addSpillSlotUse(StackSlot, MI); + SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); - VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, - LIS.getVNInfoAllocator()); + VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); ++NumReloads; } @@ -1079,10 +1096,9 @@ void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot, MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to store instruction. - SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex(); - VRM.addSpillSlotUse(StackSlot, MI); + SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); - VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator()); + VNInfo *StoreVNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); ++NumSpills; } @@ -1093,8 +1109,8 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { LiveInterval &OldLI = LIS.getInterval(Reg); // Iterate over instructions using Reg. - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg); - MachineInstr *MI = RI.skipInstruction();) { + for (MachineRegisterInfo::reg_iterator RegI = MRI.reg_begin(Reg); + MachineInstr *MI = RegI.skipBundle();) { // Debug values are not allowed to affect codegen. if (MI->isDebugValue()) { @@ -1123,14 +1139,14 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { continue; // Analyze instruction. - bool Reads, Writes; - SmallVector Ops; - tie(Reads, Writes) = MI->readsWritesVirtualRegister(Reg, &Ops); + SmallVector, 8> Ops; + MIBundleOperands::RegInfo RI = + MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops); // Find the slot index where this instruction reads and writes OldLI. // This is usually the def slot, except for tied early clobbers. - SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex(); - if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getUseIndex())) + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true))) if (SlotIndex::isSameInstr(Idx, VNI->def)) Idx = VNI->def; @@ -1143,7 +1159,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { SnippetCopies.insert(MI); continue; } - if (Writes) { + if (RI.Writes) { // Hoist the spill of a sib-reg copy. if (hoistSpill(OldLI, MI)) { // This COPY is now dead, the value is already in the stack slot. @@ -1160,24 +1176,24 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { } // Attempt to fold memory ops. - if (foldMemoryOperand(MI, Ops)) + if (foldMemoryOperand(Ops)) continue; // Allocate interval around instruction. // FIXME: Infer regclass from instruction alone. - LiveInterval &NewLI = Edit->createFrom(Reg, LIS, VRM); + LiveInterval &NewLI = Edit->createFrom(Reg); NewLI.markNotSpillable(); - if (Reads) + if (RI.Reads) insertReload(NewLI, Idx, MI); // Rewrite instruction operands. bool hasLiveDef = false; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(Ops[i]); + MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second); MO.setReg(NewLI.reg); if (MO.isUse()) { - if (!MI->isRegTiedToDefOperand(Ops[i])) + if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second)) MO.setIsKill(); } else { if (!MO.isDead()) @@ -1187,15 +1203,15 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI); // FIXME: Use a second vreg if instruction has no tied ops. - if (Writes) { - if (hasLiveDef) - insertSpill(NewLI, OldLI, Idx, MI); - else { - // This instruction defines a dead value. We don't need to spill it, - // but do create a live range for the dead value. - VNInfo *VNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(Idx, Idx.getNextSlot(), VNI)); - } + if (RI.Writes) { + if (hasLiveDef) + insertSpill(NewLI, OldLI, Idx, MI); + else { + // This instruction defines a dead value. We don't need to spill it, + // but do create a live range for the dead value. + VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); + NewLI.addRange(LiveRange(Idx, Idx.getDeadSlot(), VNI)); + } } DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); @@ -1208,7 +1224,7 @@ void InlineSpiller::spillAll() { if (StackSlot == VirtRegMap::NO_STACK_SLOT) { StackSlot = VRM.assignVirt2StackSlot(Original); StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original)); - StackInt->getNextValue(SlotIndex(), 0, LSS.getVNInfoAllocator()); + StackInt->getNextValue(SlotIndex(), LSS.getVNInfoAllocator()); } else StackInt = &LSS.getInterval(StackSlot); @@ -1228,7 +1244,7 @@ void InlineSpiller::spillAll() { // Hoisted spills may cause dead code. if (!DeadDefs.empty()) { DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n"); - Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); } // Finally delete the SnippetCopies. @@ -1237,7 +1253,6 @@ void InlineSpiller::spillAll() { MachineInstr *MI = RI.skipInstruction();) { assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy"); // FIXME: Do this with a LiveRangeEdit callback. - VRM.RemoveMachineInstrFromMaps(MI); LIS.RemoveMachineInstrFromMaps(MI); MI->eraseFromParent(); } @@ -1245,7 +1260,7 @@ void InlineSpiller::spillAll() { // Delete all spilled registers. for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) - Edit->eraseVirtReg(RegsToSpill[i], LIS); + Edit->eraseVirtReg(RegsToSpill[i]); } void InlineSpiller::spill(LiveRangeEdit &edit) { @@ -1274,5 +1289,5 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { if (!RegsToSpill.empty()) spillAll(); - Edit->calculateRegClassAndHint(MF, LIS, Loops); + Edit->calculateRegClassAndHint(MF, Loops); } diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp index 29b47bd67ece..8368b58880a3 100644 --- a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp @@ -1,4 +1,4 @@ -//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===// +//===-- InterferenceCache.cpp - Caching per-block interference ---------*--===// // // The LLVM Compiler Infrastructure // @@ -15,6 +15,7 @@ #include "InterferenceCache.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" using namespace llvm; @@ -24,13 +25,14 @@ InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference; void InterferenceCache::init(MachineFunction *mf, LiveIntervalUnion *liuarray, SlotIndexes *indexes, + LiveIntervals *lis, const TargetRegisterInfo *tri) { MF = mf; LIUArray = liuarray; TRI = tri; PhysRegEntries.assign(TRI->getNumRegs(), 0); for (unsigned i = 0; i != CacheEntries; ++i) - Entries[i].clear(mf, indexes); + Entries[i].clear(mf, indexes, lis); } InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { @@ -78,7 +80,7 @@ void InterferenceCache::Entry::reset(unsigned physReg, PhysReg = physReg; Blocks.resize(MF->getNumBlockIDs()); Aliases.clear(); - for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) { + for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) { LiveIntervalUnion *LIU = LIUArray + *AS; Aliases.push_back(std::make_pair(LIU, LIU->getTag())); } @@ -94,7 +96,7 @@ void InterferenceCache::Entry::reset(unsigned physReg, bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI) { unsigned i = 0, e = Aliases.size(); - for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) { + for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) { LiveIntervalUnion *LIU = LIUArray + *AS; if (i == e || Aliases[i].first != LIU) return false; @@ -121,6 +123,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum); BlockInterference *BI = &Blocks[MBBNum]; + ArrayRef RegMaskSlots; + ArrayRef RegMaskBits; for (;;) { BI->Tag = Tag; BI->First = BI->Last = SlotIndex(); @@ -137,6 +141,18 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { BI->First = StartI; } + // Also check for register mask interference. + RegMaskSlots = LIS->getRegMaskSlotsInBlock(MBBNum); + RegMaskBits = LIS->getRegMaskBitsInBlock(MBBNum); + SlotIndex Limit = BI->First.isValid() ? BI->First : Stop; + for (unsigned i = 0, e = RegMaskSlots.size(); + i != e && RegMaskSlots[i] < Limit; ++i) + if (MachineOperand::clobbersPhysReg(RegMaskBits[i], PhysReg)) { + // Register mask i clobbers PhysReg before the LIU interference. + BI->First = RegMaskSlots[i]; + break; + } + PrevPos = Stop; if (BI->First.isValid()) break; @@ -166,4 +182,15 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { if (Backup) ++I; } + + // Also check for register mask interference. + SlotIndex Limit = BI->Last.isValid() ? BI->Last : Start; + for (unsigned i = RegMaskSlots.size(); + i && RegMaskSlots[i-1].getDeadSlot() > Limit; --i) + if (MachineOperand::clobbersPhysReg(RegMaskBits[i-1], PhysReg)) { + // Register mask i-1 clobbers PhysReg after the LIU interference. + // Model the regmask clobber as a dead def. + BI->Last = RegMaskSlots[i-1].getDeadSlot(); + break; + } } diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm/lib/CodeGen/InterferenceCache.h index 4df0a9e5c393..485a325aa146 100644 --- a/contrib/llvm/lib/CodeGen/InterferenceCache.h +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.h @@ -18,10 +18,11 @@ namespace llvm { +class LiveIntervals; + class InterferenceCache { const TargetRegisterInfo *TRI; LiveIntervalUnion *LIUArray; - SlotIndexes *Indexes; MachineFunction *MF; /// BlockInterference - information about the interference in a single basic @@ -52,6 +53,9 @@ class InterferenceCache { /// Indexes - Mapping block numbers to SlotIndex ranges. SlotIndexes *Indexes; + /// LIS - Used for accessing register mask interference maps. + LiveIntervals *LIS; + /// PrevPos - The previous position the iterators were moved to. SlotIndex PrevPos; @@ -71,13 +75,14 @@ class InterferenceCache { void update(unsigned MBBNum); public: - Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0) {} + Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0), LIS(0) {} - void clear(MachineFunction *mf, SlotIndexes *indexes) { + void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) { assert(!hasRefs() && "Cannot clear cache entry with references"); PhysReg = 0; MF = mf; Indexes = indexes; + LIS = lis; } unsigned getPhysReg() const { return PhysReg; } @@ -124,10 +129,10 @@ class InterferenceCache { Entry *get(unsigned PhysReg); public: - InterferenceCache() : TRI(0), LIUArray(0), Indexes(0), MF(0), RoundRobin(0) {} + InterferenceCache() : TRI(0), LIUArray(0), MF(0), RoundRobin(0) {} /// init - Prepare cache for a new function. - void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, + void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, LiveIntervals*, const TargetRegisterInfo *); /// getMaxCursors - Return the maximum number of concurrent cursors that can diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp index 0f92c2d06bdd..a9ca42f69b97 100644 --- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -448,11 +448,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { case Intrinsic::dbg_declare: break; // Simply strip out debugging intrinsics - case Intrinsic::eh_exception: - case Intrinsic::eh_selector: - CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); - break; - case Intrinsic::eh_typeid_for: // Return something different to eh_selector. CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); diff --git a/contrib/llvm/lib/CodeGen/JITCodeEmitter.cpp b/contrib/llvm/lib/CodeGen/JITCodeEmitter.cpp new file mode 100644 index 000000000000..96a53892f6d3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/JITCodeEmitter.cpp @@ -0,0 +1,14 @@ +//===-- llvm/CodeGen/JITCodeEmitter.cpp - Code emission --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/JITCodeEmitter.h" + +using namespace llvm; + +void JITCodeEmitter::anchor() { } diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 187147a3e252..a1f479a4275f 100644 --- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -11,96 +11,31 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/PassManager.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/Assembly/PrintModulePass.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/GCStrategy.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; -namespace llvm { - bool EnableFastISel; -} - -static cl::opt DisablePostRA("disable-post-ra", cl::Hidden, - cl::desc("Disable Post Regalloc")); -static cl::opt DisableBranchFold("disable-branch-fold", cl::Hidden, - cl::desc("Disable branch folding")); -static cl::opt DisableTailDuplicate("disable-tail-duplicate", cl::Hidden, - cl::desc("Disable tail duplication")); -static cl::opt DisableEarlyTailDup("disable-early-taildup", cl::Hidden, - cl::desc("Disable pre-register allocation tail duplication")); -static cl::opt DisableCodePlace("disable-code-place", cl::Hidden, - cl::desc("Disable code placement")); -static cl::opt DisableSSC("disable-ssc", cl::Hidden, - cl::desc("Disable Stack Slot Coloring")); -static cl::opt DisableMachineDCE("disable-machine-dce", cl::Hidden, - cl::desc("Disable Machine Dead Code Elimination")); -static cl::opt DisableMachineLICM("disable-machine-licm", cl::Hidden, - cl::desc("Disable Machine LICM")); -static cl::opt DisableMachineCSE("disable-machine-cse", cl::Hidden, - cl::desc("Disable Machine Common Subexpression Elimination")); -static cl::opt DisablePostRAMachineLICM("disable-postra-machine-licm", - cl::Hidden, - cl::desc("Disable Machine LICM")); -static cl::opt DisableMachineSink("disable-machine-sink", cl::Hidden, - cl::desc("Disable Machine Sinking")); -static cl::opt DisableLSR("disable-lsr", cl::Hidden, - cl::desc("Disable Loop Strength Reduction Pass")); -static cl::opt DisableCGP("disable-cgp", cl::Hidden, - cl::desc("Disable Codegen Prepare")); -static cl::opt PrintLSR("print-lsr-output", cl::Hidden, - cl::desc("Print LLVM IR produced by the loop-reduce pass")); -static cl::opt PrintISelInput("print-isel-input", cl::Hidden, - cl::desc("Print LLVM IR input to isel pass")); -static cl::opt PrintGCInfo("print-gc", cl::Hidden, - cl::desc("Dump garbage collector data")); -static cl::opt ShowMCEncoding("show-mc-encoding", cl::Hidden, - cl::desc("Show encoding in .s output")); -static cl::opt ShowMCInst("show-mc-inst", cl::Hidden, - cl::desc("Show instruction structure in .s output")); -static cl::opt EnableMCLogging("enable-mc-api-logging", cl::Hidden, - cl::desc("Enable MC API logging")); -static cl::opt VerifyMachineCode("verify-machineinstrs", cl::Hidden, - cl::desc("Verify generated machine code"), - cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); - -static cl::opt -AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), - cl::init(cl::BOU_UNSET)); - -static bool getVerboseAsm() { - switch (AsmVerbose) { - default: - case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault(); - case cl::BOU_TRUE: return true; - case cl::BOU_FALSE: return false; - } -} - // Enable or disable FastISel. Both options are needed, because // FastISel is enabled by default with -fast, and we wish to be // able to enable or disable fast-isel independently from -O0. @@ -108,11 +43,31 @@ static cl::opt EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); +static cl::opt ShowMCEncoding("show-mc-encoding", cl::Hidden, + cl::desc("Show encoding in .s output")); +static cl::opt ShowMCInst("show-mc-inst", cl::Hidden, + cl::desc("Show instruction structure in .s output")); + +static cl::opt +AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), + cl::init(cl::BOU_UNSET)); + +static bool getVerboseAsm() { + switch (AsmVerbose) { + case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault(); + case cl::BOU_TRUE: return true; + case cl::BOU_FALSE: return false; + } + llvm_unreachable("Invalid verbose asm state"); +} + LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : TargetMachine(T, Triple, CPU, FS) { - CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM); + TargetOptions Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : TargetMachine(T, Triple, CPU, FS, Options) { + CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL); AsmInfo = T.createMCAsmInfo(Triple); // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, // and if the old one gets included then MCAsmInfo will be NULL and @@ -123,16 +78,88 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, "and that InitializeAllTargetMCs() is being invoked!"); } +/// Turn exception handling constructs into something the code generators can +/// handle. +static void addPassesToHandleExceptions(TargetMachine *TM, + PassManagerBase &PM) { + switch (TM->getMCAsmInfo()->getExceptionHandlingType()) { + case ExceptionHandling::SjLj: + // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both + // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, + // catch info can get misplaced when a selector ends up more than one block + // removed from the parent invoke(s). This could happen when a landing + // pad is shared by multiple invokes and is also a target of a normal + // edge from elsewhere. + PM.add(createSjLjEHPreparePass(TM->getTargetLowering())); + // FALLTHROUGH + case ExceptionHandling::DwarfCFI: + case ExceptionHandling::ARM: + case ExceptionHandling::Win64: + PM.add(createDwarfEHPass(TM)); + break; + case ExceptionHandling::None: + PM.add(createLowerInvokePass(TM->getTargetLowering())); + + // The lower invoke pass may create unreachable code. Remove it. + PM.add(createUnreachableBlockEliminationPass()); + break; + } +} + +/// addPassesToX helper drives creation and initialization of TargetPassConfig. +static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, + PassManagerBase &PM, + bool DisableVerify) { + // Targets may override createPassConfig to provide a target-specific sublass. + TargetPassConfig *PassConfig = TM->createPassConfig(PM); + + // Set PassConfig options provided by TargetMachine. + PassConfig->setDisableVerify(DisableVerify); + + PM.add(PassConfig); + + PassConfig->addIRPasses(); + + addPassesToHandleExceptions(TM, PM); + + PassConfig->addISelPrepare(); + + // Install a MachineModuleInfo class, which is an immutable pass that holds + // all the per-module stuff we're generating, including MCContext. + MachineModuleInfo *MMI = + new MachineModuleInfo(*TM->getMCAsmInfo(), *TM->getRegisterInfo(), + &TM->getTargetLowering()->getObjFileLowering()); + PM.add(MMI); + MCContext *Context = &MMI->getContext(); // Return the MCContext by-ref. + + // Set up a MachineFunction for the rest of CodeGen to work on. + PM.add(new MachineFunctionAnalysis(*TM)); + + // Enable FastISel with -fast, but allow that to be overridden. + if (EnableFastISelOption == cl::BOU_TRUE || + (TM->getOptLevel() == CodeGenOpt::None && + EnableFastISelOption != cl::BOU_FALSE)) + TM->setFastISel(true); + + // Ask the target for an isel. + if (PassConfig->addInstSelector()) + return NULL; + + PassConfig->addMachinePasses(); + + PassConfig->setInitialized(); + + return Context; +} + bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, bool DisableVerify) { // Add common CodeGen passes. - MCContext *Context = 0; - if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context)) + MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify); + if (!Context) return true; - assert(Context != 0 && "Failed to get MCContext"); if (hasMCSaveTempLabels()) Context->setAllowTemporaryLabels(false); @@ -142,10 +169,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, OwningPtr AsmStreamer; switch (FileType) { - default: return true; case CGFT_AssemblyFile: { MCInstPrinter *InstPrinter = - getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI); + getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, + *getInstrInfo(), + Context->getRegisterInfo(), STI); // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = 0; @@ -160,6 +188,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, getVerboseAsm(), hasMCUseLoc(), hasMCUseCFI(), + hasMCUseDwarfDirectory(), InstPrinter, MCE, MAB, ShowMCInst); @@ -189,9 +218,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, break; } - if (EnableMCLogging) - AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs())); - // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); if (Printer == 0) @@ -214,14 +240,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, /// bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, JITCodeEmitter &JCE, - CodeGenOpt::Level OptLevel, bool DisableVerify) { // Add common CodeGen passes. - MCContext *Ctx = 0; - if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx)) + MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify); + if (!Context) return true; - addCodeEmitter(PM, OptLevel, JCE); + addCodeEmitter(PM, JCE); PM.add(createGCInfoDeleter()); return false; // success! @@ -235,10 +260,10 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, raw_ostream &Out, - CodeGenOpt::Level OptLevel, bool DisableVerify) { // Add common CodeGen passes. - if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx)) + Ctx = addPassesToGenerateCode(this, PM, DisableVerify); + if (!Ctx) return true; if (hasMCSaveTempLabels()) @@ -247,7 +272,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, // Create the code emitter for the target if it exists. If not, .o file // emission fails. const MCSubtargetInfo &STI = getSubtarget(); - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI, *Ctx); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI, + *Ctx); MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple()); if (MCE == 0 || MAB == 0) return true; @@ -271,227 +297,3 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, return false; // success! } - -static void printNoVerify(PassManagerBase &PM, const char *Banner) { - if (PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); -} - -static void printAndVerify(PassManagerBase &PM, - const char *Banner) { - if (PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); - - if (VerifyMachineCode) - PM.add(createMachineVerifierPass(Banner)); -} - -/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both -/// emitting to assembly files or machine code output. -/// -bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool DisableVerify, - MCContext *&OutContext) { - // Standard LLVM-Level Passes. - - // Basic AliasAnalysis support. - // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that - // BasicAliasAnalysis wins if they disagree. This is intended to help - // support "obvious" type-punning idioms. - PM.add(createTypeBasedAliasAnalysisPass()); - PM.add(createBasicAliasAnalysisPass()); - - // Before running any passes, run the verifier to determine if the input - // coming from the front-end and/or optimizer is valid. - if (!DisableVerify) - PM.add(createVerifierPass()); - - // Run loop strength reduction before anything else. - if (OptLevel != CodeGenOpt::None && !DisableLSR) { - PM.add(createLoopStrengthReducePass(getTargetLowering())); - if (PrintLSR) - PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); - } - - PM.add(createGCLoweringPass()); - - // Make sure that no unreachable blocks are instruction selected. - PM.add(createUnreachableBlockEliminationPass()); - - // Turn exception handling constructs into something the code generators can - // handle. - switch (getMCAsmInfo()->getExceptionHandlingType()) { - case ExceptionHandling::SjLj: - // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both - // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, - // catch info can get misplaced when a selector ends up more than one block - // removed from the parent invoke(s). This could happen when a landing - // pad is shared by multiple invokes and is also a target of a normal - // edge from elsewhere. - PM.add(createSjLjEHPass(getTargetLowering())); - // FALLTHROUGH - case ExceptionHandling::DwarfCFI: - case ExceptionHandling::ARM: - case ExceptionHandling::Win64: - PM.add(createDwarfEHPass(this)); - break; - case ExceptionHandling::None: - PM.add(createLowerInvokePass(getTargetLowering())); - - // The lower invoke pass may create unreachable code. Remove it. - PM.add(createUnreachableBlockEliminationPass()); - break; - } - - if (OptLevel != CodeGenOpt::None && !DisableCGP) - PM.add(createCodeGenPreparePass(getTargetLowering())); - - PM.add(createStackProtectorPass(getTargetLowering())); - - addPreISel(PM, OptLevel); - - if (PrintISelInput) - PM.add(createPrintFunctionPass("\n\n" - "*** Final LLVM Code input to ISel ***\n", - &dbgs())); - - // All passes which modify the LLVM IR are now complete; run the verifier - // to ensure that the IR is valid. - if (!DisableVerify) - PM.add(createVerifierPass()); - - // Standard Lower-Level Passes. - - // Install a MachineModuleInfo class, which is an immutable pass that holds - // all the per-module stuff we're generating, including MCContext. - MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), - *getRegisterInfo(), - &getTargetLowering()->getObjFileLowering()); - PM.add(MMI); - OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref. - - // Set up a MachineFunction for the rest of CodeGen to work on. - PM.add(new MachineFunctionAnalysis(*this, OptLevel)); - - // Enable FastISel with -fast, but allow that to be overridden. - if (EnableFastISelOption == cl::BOU_TRUE || - (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE)) - EnableFastISel = true; - - // Ask the target for an isel. - if (addInstSelector(PM, OptLevel)) - return true; - - // Print the instruction selected machine code... - printAndVerify(PM, "After Instruction Selection"); - - // Expand pseudo-instructions emitted by ISel. - PM.add(createExpandISelPseudosPass()); - - // Pre-ra tail duplication. - if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) { - PM.add(createTailDuplicatePass(true)); - printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); - } - - // Optimize PHIs before DCE: removing dead PHI cycles may make more - // instructions dead. - if (OptLevel != CodeGenOpt::None) - PM.add(createOptimizePHIsPass()); - - // If the target requests it, assign local variables to stack slots relative - // to one another and simplify frame index references where possible. - PM.add(createLocalStackSlotAllocationPass()); - - if (OptLevel != CodeGenOpt::None) { - // With optimization, dead code should already be eliminated. However - // there is one known exception: lowered code for arguments that are only - // used by tail calls, where the tail calls reuse the incoming stack - // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). - if (!DisableMachineDCE) - PM.add(createDeadMachineInstructionElimPass()); - printAndVerify(PM, "After codegen DCE pass"); - - if (!DisableMachineLICM) - PM.add(createMachineLICMPass()); - if (!DisableMachineCSE) - PM.add(createMachineCSEPass()); - if (!DisableMachineSink) - PM.add(createMachineSinkingPass()); - printAndVerify(PM, "After Machine LICM, CSE and Sinking passes"); - - PM.add(createPeepholeOptimizerPass()); - printAndVerify(PM, "After codegen peephole optimization pass"); - } - - // Run pre-ra passes. - if (addPreRegAlloc(PM, OptLevel)) - printAndVerify(PM, "After PreRegAlloc passes"); - - // Perform register allocation. - PM.add(createRegisterAllocator(OptLevel)); - printAndVerify(PM, "After Register Allocation"); - - // Perform stack slot coloring and post-ra machine LICM. - if (OptLevel != CodeGenOpt::None) { - // FIXME: Re-enable coloring with register when it's capable of adding - // kill markers. - if (!DisableSSC) - PM.add(createStackSlotColoringPass(false)); - - // Run post-ra machine LICM to hoist reloads / remats. - if (!DisablePostRAMachineLICM) - PM.add(createMachineLICMPass(false)); - - printAndVerify(PM, "After StackSlotColoring and postra Machine LICM"); - } - - // Run post-ra passes. - if (addPostRegAlloc(PM, OptLevel)) - printAndVerify(PM, "After PostRegAlloc passes"); - - PM.add(createExpandPostRAPseudosPass()); - printAndVerify(PM, "After ExpandPostRAPseudos"); - - // Insert prolog/epilog code. Eliminate abstract frame index references... - PM.add(createPrologEpilogCodeInserter()); - printAndVerify(PM, "After PrologEpilogCodeInserter"); - - // Run pre-sched2 passes. - if (addPreSched2(PM, OptLevel)) - printAndVerify(PM, "After PreSched2 passes"); - - // Second pass scheduler. - if (OptLevel != CodeGenOpt::None && !DisablePostRA) { - PM.add(createPostRAScheduler(OptLevel)); - printAndVerify(PM, "After PostRAScheduler"); - } - - // Branch folding must be run after regalloc and prolog/epilog insertion. - if (OptLevel != CodeGenOpt::None && !DisableBranchFold) { - PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); - printNoVerify(PM, "After BranchFolding"); - } - - // Tail duplication. - if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) { - PM.add(createTailDuplicatePass(false)); - printNoVerify(PM, "After TailDuplicate"); - } - - PM.add(createGCMachineCodeAnalysisPass()); - - if (PrintGCInfo) - PM.add(createGCInfoPrinter(dbgs())); - - if (OptLevel != CodeGenOpt::None && !DisableCodePlace) { - PM.add(createCodePlacementOptPass()); - printNoVerify(PM, "After CodePlacementOpt"); - } - - if (addPreEmitPass(PM, OptLevel)) - printNoVerify(PM, "After PreEmit passes"); - - return false; -} diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp index 0eb009ddac29..deab05a412c9 100644 --- a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp @@ -46,7 +46,7 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { // Finally, just to provide a stable ordering, use the node number as a // deciding factor. - return LHSNum < RHSNum; + return RHSNum < LHSNum; } @@ -84,11 +84,11 @@ void LatencyPriorityQueue::push(SUnit *SU) { } -// ScheduledNode - As nodes are scheduled, we look to see if there are any +// scheduledNode - As nodes are scheduled, we look to see if there are any // successor nodes that have a single unscheduled predecessor. If so, that // single predecessor has a higher priority, since scheduling it will make // the node available. -void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { +void LatencyPriorityQueue::scheduledNode(SUnit *SU) { for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { AdjustPriorityOfUnscheduledPreds(I->getSUnit()); diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp index a12e1a36d113..f1abcbb1dd5c 100644 --- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp +++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp @@ -311,6 +311,8 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) { return Result; } +void LexicalScope::anchor() { } + /// dump - Print data structures. void LexicalScope::dump() const { #ifndef NDEBUG diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp index 3dfe4c0e8cfa..2187833031ee 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -226,7 +226,7 @@ class UserValue { LiveInterval *LI, const VNInfo *VNI, SmallVectorImpl *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT, - UserValueScopes &UVS); + UserValueScopes &UVS); /// addDefsFromCopies - The value in LI/LocNo may be copies to other /// registers. Determine if any of the copies are available at the kill @@ -468,7 +468,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { // DBG_VALUE has no slot index, use the previous instruction instead. SlotIndex Idx = MBBI == MBB->begin() ? LIS->getMBBStartIdx(MBB) : - LIS->getInstructionIndex(llvm::prior(MBBI)).getDefIndex(); + LIS->getInstructionIndex(llvm::prior(MBBI)).getRegSlot(); // Handle consecutive DBG_VALUE instructions with the same slot index. do { if (handleDebugValue(MBBI, Idx)) { @@ -486,7 +486,7 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveInterval *LI, const VNInfo *VNI, SmallVectorImpl *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT, - UserValueScopes &UVS) { + UserValueScopes &UVS) { SmallVector Todo; Todo.push_back(Idx); do { @@ -575,15 +575,15 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, // Is LocNo extended to reach this copy? If not, another def may be blocking // it, or we are looking at a wrong value of LI. SlotIndex Idx = LIS.getInstructionIndex(MI); - LocMap::iterator I = locInts.find(Idx.getUseIndex()); + LocMap::iterator I = locInts.find(Idx.getRegSlot(true)); if (!I.valid() || I.value() != LocNo) continue; if (!LIS.hasInterval(DstReg)) continue; LiveInterval *DstLI = &LIS.getInterval(DstReg); - const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getDefIndex()); - assert(DstVNI && DstVNI->def == Idx.getDefIndex() && "Bad copy value"); + const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot()); + assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value"); CopyValues.push_back(std::make_pair(DstLI, DstVNI)); } @@ -620,7 +620,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, LiveIntervals &LIS, MachineDominatorTree &MDT, - UserValueScopes &UVS) { + UserValueScopes &UVS) { SmallVector, 16> Defs; // Collect all defs to be extended (Skipping undefs). @@ -841,7 +841,7 @@ bool UserValue::splitRegister(unsigned OldReg, ArrayRef NewRegs) { bool DidChange = false; // Split locations referring to OldReg. Iterate backwards so splitLocation can - // safely erase unuused locations. + // safely erase unused locations. for (unsigned i = locations.size(); i ; --i) { unsigned LocNo = i-1; const MachineOperand *Loc = &locations[LocNo]; @@ -889,8 +889,7 @@ UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) { // index is no longer available. That means the user value is in a // non-existent sub-register, and %noreg is exactly what we want. Loc.substPhysReg(VRM.getPhys(VirtReg), TRI); - } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT && - VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) { + } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) { // FIXME: Translate SubIdx to a stackslot offset. Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg)); } else { @@ -921,8 +920,8 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, } // Don't insert anything after the first terminator, though. - return MI->getDesc().isTerminator() ? MBB->getFirstTerminator() : - llvm::next(MachineBasicBlock::iterator(MI)); + return MI->isTerminator() ? MBB->getFirstTerminator() : + llvm::next(MachineBasicBlock::iterator(MI)); } DebugLoc UserValue::findDebugLoc() { diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp index b69945aea98f..ac18843ac30d 100644 --- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp +++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp @@ -381,37 +381,40 @@ void LiveInterval::join(LiveInterval &Other, for (unsigned i = 0; i != NumVals; ++i) { unsigned LHSValID = LHSValNoAssignments[i]; if (i != LHSValID || - (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i))) + (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i))) { MustMapCurValNos = true; + break; + } } // If we have to apply a mapping to our base interval assignment, rewrite it // now. if (MustMapCurValNos) { // Map the first live range. + iterator OutIt = begin(); OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]]; - ++OutIt; - for (iterator I = OutIt, E = end(); I != E; ++I) { - OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]]; + for (iterator I = next(OutIt), E = end(); I != E; ++I) { + VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]]; + assert(nextValNo != 0 && "Huh?"); // If this live range has the same value # as its immediate predecessor, // and if they are neighbors, remove one LiveRange. This happens when we - // have [0,3:0)[4,7:1) and map 0/1 onto the same value #. - if (OutIt->valno == (OutIt-1)->valno && (OutIt-1)->end == OutIt->start) { - (OutIt-1)->end = OutIt->end; + // have [0,4:0)[4,7:1) and map 0/1 onto the same value #. + if (OutIt->valno == nextValNo && OutIt->end == I->start) { + OutIt->end = I->end; } else { - if (I != OutIt) { + // Didn't merge. Move OutIt to the next interval, + ++OutIt; + OutIt->valno = nextValNo; + if (OutIt != I) { OutIt->start = I->start; OutIt->end = I->end; } - - // Didn't merge, on to the next one. - ++OutIt; } } - // If we merge some live ranges, chop off the end. + ++OutIt; ranges.erase(OutIt, end()); } @@ -639,8 +642,6 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { OS << "-phidef"; if (vni->hasPHIKill()) OS << "-phikill"; - if (vni->hasRedefByEC()) - OS << "-ec"; } } } @@ -680,15 +681,14 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { // Connect to values live out of predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) - if (const VNInfo *PVNI = - LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot())) + if (const VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI))) EqClass.join(VNI->id, PVNI->id); } else { // Normal value defined by an instruction. Check for two-addr redef. // FIXME: This could be coincidental. Should we really check for a tied // operand constraint? // Note that VNI->def may be a use slot for an early clobber def. - if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot())) + if (const VNInfo *UVNI = LI->getVNInfoBefore(VNI->def)) EqClass.join(VNI->id, UVNI->id); } } @@ -716,7 +716,7 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], continue; // DBG_VALUE instructions should have been eliminated earlier. SlotIndex Idx = LIS.getInstructionIndex(MI); - Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); + Idx = Idx.getRegSlot(MO.isUse()); const VNInfo *VNI = LI.getVNInfoAt(Idx); assert(VNI && "Interval not live at use."); MO.setReg(LIV[getEqClass(VNI)]->reg); diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp index b1e202a273d3..3ade66097cbd 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -15,31 +15,22 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "liveintervals" +#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "VirtRegMap.h" #include "llvm/Value.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveVariables.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/ProcessImplicitDefs.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include @@ -52,19 +43,14 @@ static cl::opt DisableReMat("disable-rematerialization", cl::init(false), cl::Hidden); STATISTIC(numIntervals , "Number of original intervals"); -STATISTIC(numFolds , "Number of loads/stores folded into instructions"); -STATISTIC(numSplits , "Number of intervals split"); char LiveIntervals::ID = 0; INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", "Live Interval Analysis", false, false) -INITIALIZE_PASS_DEPENDENCY(LiveVariables) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(PHIElimination) -INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass) -INITIALIZE_PASS_DEPENDENCY(ProcessImplicitDefs) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(LiveVariables) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_END(LiveIntervals, "liveintervals", "Live Interval Analysis", false, false) @@ -74,18 +60,8 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); + AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); - - if (!StrongPHIElim) { - AU.addPreservedID(PHIEliminationID); - AU.addRequiredID(PHIEliminationID); - } - - AU.addRequiredID(TwoAddressInstructionPassID); - AU.addPreserved(); - AU.addRequired(); AU.addPreserved(); AU.addRequiredTransitive(); MachineFunctionPass::getAnalysisUsage(AU); @@ -98,14 +74,12 @@ void LiveIntervals::releaseMemory() { delete I->second; r2iMap_.clear(); + RegMaskSlots.clear(); + RegMaskBits.clear(); + RegMaskBlocks.clear(); // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. VNInfoAllocator.Reset(); - while (!CloneMIs.empty()) { - MachineInstr *MI = CloneMIs.back(); - CloneMIs.pop_back(); - mf_->DeleteMachineInstr(MI); - } } /// runOnMachineFunction - Register allocate the whole function @@ -120,6 +94,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { lv_ = &getAnalysis(); indexes_ = &getAnalysis(); allocatableRegs_ = tri_->getAllocatableSet(fn); + reservedRegs_ = tri_->getReservedRegs(fn); computeIntervals(); @@ -132,10 +107,21 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { /// print - Implement the dump method. void LiveIntervals::print(raw_ostream &OS, const Module* ) const { OS << "********** INTERVALS **********\n"; - for (const_iterator I = begin(), E = end(); I != E; ++I) { - I->second->print(OS, tri_); - OS << "\n"; - } + + // Dump the physregs. + for (unsigned Reg = 1, RegE = tri_->getNumRegs(); Reg != RegE; ++Reg) + if (const LiveInterval *LI = r2iMap_.lookup(Reg)) { + LI->print(OS, tri_); + OS << '\n'; + } + + // Dump the virtregs. + for (unsigned Reg = 0, RegE = mri_->getNumVirtRegs(); Reg != RegE; ++Reg) + if (const LiveInterval *LI = + r2iMap_.lookup(TargetRegisterInfo::index2VirtReg(Reg))) { + LI->print(OS, tri_); + OS << '\n'; + } printInstrs(OS); } @@ -149,103 +135,6 @@ void LiveIntervals::dumpInstrs() const { printInstrs(dbgs()); } -bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li, - VirtRegMap &vrm, unsigned reg) { - // We don't handle fancy stuff crossing basic block boundaries - if (li.ranges.size() != 1) - return true; - const LiveRange &range = li.ranges.front(); - SlotIndex idx = range.start.getBaseIndex(); - SlotIndex end = range.end.getPrevSlot().getBaseIndex().getNextIndex(); - - // Skip deleted instructions - MachineInstr *firstMI = getInstructionFromIndex(idx); - while (!firstMI && idx != end) { - idx = idx.getNextIndex(); - firstMI = getInstructionFromIndex(idx); - } - if (!firstMI) - return false; - - // Find last instruction in range - SlotIndex lastIdx = end.getPrevIndex(); - MachineInstr *lastMI = getInstructionFromIndex(lastIdx); - while (!lastMI && lastIdx != idx) { - lastIdx = lastIdx.getPrevIndex(); - lastMI = getInstructionFromIndex(lastIdx); - } - if (!lastMI) - return false; - - // Range cannot cross basic block boundaries or terminators - MachineBasicBlock *MBB = firstMI->getParent(); - if (MBB != lastMI->getParent() || lastMI->getDesc().isTerminator()) - return true; - - MachineBasicBlock::const_iterator E = lastMI; - ++E; - for (MachineBasicBlock::const_iterator I = firstMI; I != E; ++I) { - const MachineInstr &MI = *I; - - // Allow copies to and from li.reg - if (MI.isCopy()) - if (MI.getOperand(0).getReg() == li.reg || - MI.getOperand(1).getReg() == li.reg) - continue; - - // Check for operands using reg - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand& mop = MI.getOperand(i); - if (!mop.isReg()) - continue; - unsigned PhysReg = mop.getReg(); - if (PhysReg == 0 || PhysReg == li.reg) - continue; - if (TargetRegisterInfo::isVirtualRegister(PhysReg)) { - if (!vrm.hasPhys(PhysReg)) - continue; - PhysReg = vrm.getPhys(PhysReg); - } - if (PhysReg && tri_->regsOverlap(PhysReg, reg)) - return true; - } - } - - // No conflicts found. - return false; -} - -bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg, - SmallPtrSet &JoinedCopies) { - for (LiveInterval::Ranges::const_iterator - I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { - for (SlotIndex index = I->start.getBaseIndex(), - end = I->end.getPrevSlot().getBaseIndex().getNextIndex(); - index != end; - index = index.getNextIndex()) { - MachineInstr *MI = getInstructionFromIndex(index); - if (!MI) - continue; // skip deleted instructions - - if (JoinedCopies.count(MI)) - continue; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned PhysReg = MO.getReg(); - if (PhysReg == 0 || PhysReg == Reg || - TargetRegisterInfo::isVirtualRegister(PhysReg)) - continue; - if (tri_->regsOverlap(Reg, PhysReg)) - return true; - } - } - } - - return false; -} - static bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { unsigned Reg = MI.getOperand(MOIdx).getReg(); @@ -271,9 +160,9 @@ bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO, if (!MO.getSubReg() || MO.isEarlyClobber()) return false; - SlotIndex RedefIndex = MIIdx.getDefIndex(); + SlotIndex RedefIndex = MIIdx.getRegSlot(); const LiveRange *OldLR = - interval.getLiveRangeContaining(RedefIndex.getUseIndex()); + interval.getLiveRangeContaining(RedefIndex.getRegSlot(true)); MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def); if (DefMI != 0) { return DefMI->findRegisterDefOperandIdx(interval.reg) != -1; @@ -296,34 +185,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg); if (interval.empty()) { // Get the Idx of the defining instructions. - SlotIndex defIndex = MIIdx.getDefIndex(); - // Earlyclobbers move back one, so that they overlap the live range - // of inputs. - if (MO.isEarlyClobber()) - defIndex = MIIdx.getUseIndex(); + SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber()); - // Make sure the first definition is not a partial redefinition. Add an - // of the full register. - // FIXME: LiveIntervals shouldn't modify the code like this. Whoever - // created the machine instruction should annotate it with flags - // as needed. Then we can simply assert here. The REG_SEQUENCE lowering - // is the main suspect. - if (MO.getSubReg()) { - mi->addRegisterDefined(interval.reg); - // Mark all defs of interval.reg on this instruction as reading . - for (unsigned i = MOIdx, e = mi->getNumOperands(); i != e; ++i) { - MachineOperand &MO2 = mi->getOperand(i); - if (MO2.isReg() && MO2.getReg() == interval.reg && MO2.getSubReg()) - MO2.setIsUndef(); - } - } + // Make sure the first definition is not a partial redefinition. + assert(!MO.readsReg() && "First def cannot also read virtual register " + "missing flag?"); - MachineInstr *CopyMI = NULL; - if (mi->isCopyLike()) { - CopyMI = mi; - } - - VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator); + VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator); assert(ValNo->id == 0 && "First value in interval is not 0?"); // Loop over all of the blocks that the vreg is defined in. There are @@ -334,9 +202,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // FIXME: what about dead vars? SlotIndex killIdx; if (vi.Kills[0] != mi) - killIdx = getInstructionIndex(vi.Kills[0]).getDefIndex(); + killIdx = getInstructionIndex(vi.Kills[0]).getRegSlot(); else - killIdx = defIndex.getStoreIndex(); + killIdx = defIndex.getDeadSlot(); // If the kill happens after the definition, we have an intra-block // live range. @@ -384,14 +252,14 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) { MachineInstr *Kill = vi.Kills[i]; SlotIndex Start = getMBBStartIdx(Kill->getParent()); - SlotIndex killIdx = getInstructionIndex(Kill).getDefIndex(); + SlotIndex killIdx = getInstructionIndex(Kill).getRegSlot(); // Create interval with one of a NEW value number. Note that this value // number isn't actually defined by an instruction, weird huh? :) if (PHIJoin) { assert(getInstructionFromIndex(Start) == 0 && "PHI def index points at actual instruction."); - ValNo = interval.getNextValue(Start, 0, VNInfoAllocator); + ValNo = interval.getNextValue(Start, VNInfoAllocator); ValNo->setIsPHIDef(true); } LiveRange LR(Start, killIdx, ValNo); @@ -422,14 +290,12 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // are actually two values in the live interval. Because of this we // need to take the LiveRegion that defines this register and split it // into two values. - SlotIndex RedefIndex = MIIdx.getDefIndex(); - if (MO.isEarlyClobber()) - RedefIndex = MIIdx.getUseIndex(); + SlotIndex RedefIndex = MIIdx.getRegSlot(MO.isEarlyClobber()); const LiveRange *OldLR = - interval.getLiveRangeContaining(RedefIndex.getUseIndex()); + interval.getLiveRangeContaining(RedefIndex.getRegSlot(true)); VNInfo *OldValNo = OldLR->valno; - SlotIndex DefIndex = OldValNo->def.getDefIndex(); + SlotIndex DefIndex = OldValNo->def.getRegSlot(); // Delete the previous value, which should be short and continuous, // because the 2-addr copy must be in the same MBB as the redef. @@ -440,12 +306,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator); // Value#0 is now defined by the 2-addr instruction. - OldValNo->def = RedefIndex; - OldValNo->setCopy(0); - - // A re-def may be a copy. e.g. %reg1030:6 = VMOVD %reg1026, ... - if (PartReDef && mi->isCopyLike()) - OldValNo->setCopy(&*mi); + OldValNo->def = RedefIndex; // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); @@ -455,7 +316,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // If this redefinition is dead, we need to add a dummy unit live // range covering the def slot. if (MO.isDead()) - interval.addRange(LiveRange(RedefIndex, RedefIndex.getStoreIndex(), + interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(), OldValNo)); DEBUG({ @@ -467,15 +328,11 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // live until the end of the block. We've already taken care of the // rest of the live range. - SlotIndex defIndex = MIIdx.getDefIndex(); + SlotIndex defIndex = MIIdx.getRegSlot(); if (MO.isEarlyClobber()) - defIndex = MIIdx.getUseIndex(); + defIndex = MIIdx.getRegSlot(true); - VNInfo *ValNo; - MachineInstr *CopyMI = NULL; - if (mi->isCopyLike()) - CopyMI = mi; - ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator); + VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator); SlotIndex killIndex = getMBBEndIdx(mbb); LiveRange LR(defIndex, killIndex, ValNo); @@ -490,21 +347,26 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, DEBUG(dbgs() << '\n'); } +static bool isRegLiveIntoSuccessor(const MachineBasicBlock *MBB, unsigned Reg) { + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); + SI != SE; ++SI) { + const MachineBasicBlock* succ = *SI; + if (succ->isLiveIn(Reg)) + return true; + } + return false; +} + void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, MachineBasicBlock::iterator mi, SlotIndex MIIdx, MachineOperand& MO, - LiveInterval &interval, - MachineInstr *CopyMI) { - // A physical register cannot be live across basic block, so its - // lifetime must end somewhere in its defining basic block. + LiveInterval &interval) { DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_)); SlotIndex baseIndex = MIIdx; - SlotIndex start = baseIndex.getDefIndex(); - // Earlyclobbers move back one. - if (MO.isEarlyClobber()) - start = MIIdx.getUseIndex(); + SlotIndex start = baseIndex.getRegSlot(MO.isEarlyClobber()); SlotIndex end = start; // If it is not used after definition, it is considered dead at @@ -514,7 +376,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, // advance below compensates. if (MO.isDead()) { DEBUG(dbgs() << " dead"); - end = start.getStoreIndex(); + end = start.getDeadSlot(); goto exit; } @@ -531,21 +393,21 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, if (mi->killsRegister(interval.reg, tri_)) { DEBUG(dbgs() << " killed"); - end = baseIndex.getDefIndex(); + end = baseIndex.getRegSlot(); goto exit; } else { int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_); if (DefIdx != -1) { if (mi->isRegTiedToUseOperand(DefIdx)) { // Two-address instruction. - end = baseIndex.getDefIndex(); + end = baseIndex.getRegSlot(mi->getOperand(DefIdx).isEarlyClobber()); } else { // Another instruction redefines the register before it is ever read. // Then the register is essentially dead at the instruction that // defines it. Hence its interval is: // [defSlot(def), defSlot(def)+1) DEBUG(dbgs() << " dead"); - end = start.getStoreIndex(); + end = start.getDeadSlot(); } goto exit; } @@ -554,12 +416,19 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, baseIndex = baseIndex.getNextIndex(); } - // The only case we should have a dead physreg here without a killing or - // instruction where we know it's dead is if it is live-in to the function - // and never used. Another possible case is the implicit use of the - // physical register has been deleted by two-address pass. - end = start.getStoreIndex(); + // If we get here the register *should* be live out. + assert(!isAllocatable(interval.reg) && "Physregs shouldn't be live out!"); + // FIXME: We need saner rules for reserved regs. + if (isReserved(interval.reg)) { + end = start.getDeadSlot(); + } else { + // Unreserved, unallocable registers like EFLAGS can be live across basic + // block boundaries. + assert(isRegLiveIntoSuccessor(MBB, interval.reg) && + "Unreserved reg not live-out?"); + end = getMBBEndIdx(MBB); + } exit: assert(start < end && "did not find end of interval?"); @@ -567,9 +436,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, VNInfo *ValNo = interval.getVNInfoAt(start); bool Extend = ValNo != 0; if (!Extend) - ValNo = interval.getNextValue(start, CopyMI, VNInfoAllocator); - if (Extend && MO.isEarlyClobber()) - ValNo->setHasRedefByEC(true); + ValNo = interval.getNextValue(start, VNInfoAllocator); LiveRange LR(start, end, ValNo); interval.addRange(LR); DEBUG(dbgs() << " +" << LR << '\n'); @@ -583,18 +450,20 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx, getOrCreateInterval(MO.getReg())); - else { - MachineInstr *CopyMI = NULL; - if (MI->isCopyLike()) - CopyMI = MI; + else handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, - getOrCreateInterval(MO.getReg()), CopyMI); - } + getOrCreateInterval(MO.getReg())); } void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, SlotIndex MIIdx, - LiveInterval &interval, bool isAlias) { + LiveInterval &interval) { + assert(TargetRegisterInfo::isPhysicalRegister(interval.reg) && + "Only physical registers can be live in."); + assert((!isAllocatable(interval.reg) || MBB->getParent()->begin() || + MBB->isLandingPad()) && + "Allocatable live-ins only valid for entry blocks and landing pads."); + DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_)); // Look for kills, if it reaches a def before it's killed, then it shouldn't @@ -621,16 +490,16 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, while (mi != E) { if (mi->killsRegister(interval.reg, tri_)) { DEBUG(dbgs() << " killed"); - end = baseIndex.getDefIndex(); + end = baseIndex.getRegSlot(); SeenDefUse = true; break; - } else if (mi->definesRegister(interval.reg, tri_)) { + } else if (mi->modifiesRegister(interval.reg, tri_)) { // Another instruction redefines the register before it is ever read. // Then the register is essentially dead at the instruction that defines // it. Hence its interval is: // [defSlot(def), defSlot(def)+1) DEBUG(dbgs() << " dead"); - end = start.getStoreIndex(); + end = start.getDeadSlot(); SeenDefUse = true; break; } @@ -644,10 +513,16 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, // Live-in register might not be used at all. if (!SeenDefUse) { - if (isAlias) { + if (isAllocatable(interval.reg) || + !isRegLiveIntoSuccessor(MBB, interval.reg)) { + // Allocatable registers are never live through. + // Non-allocatable registers that aren't live into any successors also + // aren't live through. DEBUG(dbgs() << " dead"); - end = MIIdx.getStoreIndex(); + return; } else { + // If we get here the register is non-allocatable and live into some + // successor. We'll conservatively assume it's live-through. DEBUG(dbgs() << " live through"); end = getMBBEndIdx(MBB); } @@ -656,8 +531,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, SlotIndex defIdx = getMBBStartIdx(MBB); assert(getInstructionFromIndex(defIdx) == 0 && "PHI def index points at actual instruction."); - VNInfo *vni = - interval.getNextValue(defIdx, 0, VNInfoAllocator); + VNInfo *vni = interval.getNextValue(defIdx, VNInfoAllocator); vni->setIsPHIDef(true); LiveRange LR(start, end, vni); @@ -674,10 +548,14 @@ void LiveIntervals::computeIntervals() { << "********** Function: " << ((Value*)mf_->getFunction())->getName() << '\n'); + RegMaskBlocks.resize(mf_->getNumBlockIDs()); + SmallVector UndefUses; for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = MBBI; + RegMaskBlocks[MBB->getNumber()].first = RegMaskSlots.size(); + if (MBB->empty()) continue; @@ -690,11 +568,6 @@ void LiveIntervals::computeIntervals() { for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(), LE = MBB->livein_end(); LI != LE; ++LI) { handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI)); - // Multiple live-ins can alias the same register. - for (const unsigned* AS = tri_->getSubRegisters(*LI); *AS; ++AS) - if (!hasInterval(*AS)) - handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS), - true); } // Skip over empty initial indices. @@ -706,10 +579,20 @@ void LiveIntervals::computeIntervals() { DEBUG(dbgs() << MIIndex << "\t" << *MI); if (MI->isDebugValue()) continue; + assert(indexes_->getInstructionFromIndex(MIIndex) == MI && + "Lost SlotIndex synchronization"); // Handle defs. for (int i = MI->getNumOperands() - 1; i >= 0; --i) { MachineOperand &MO = MI->getOperand(i); + + // Collect register masks. + if (MO.isRegMask()) { + RegMaskSlots.push_back(MIIndex.getRegSlot()); + RegMaskBits.push_back(MO.getRegMask()); + continue; + } + if (!MO.isReg() || !MO.getReg()) continue; @@ -723,6 +606,10 @@ void LiveIntervals::computeIntervals() { // Move to the next instr slot. MIIndex = indexes_->getNextNonNullIndex(MIIndex); } + + // Compute the number of register mask instructions in this block. + std::pair &RMB = RegMaskBlocks[MBB->getNumber()]; + RMB.second = RegMaskSlots.size() - RMB.first;; } // Create empty intervals for registers defined by implicit_def's (except @@ -754,7 +641,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, SmallVectorImpl *dead) { DEBUG(dbgs() << "Shrink: " << *li << '\n'); assert(TargetRegisterInfo::isVirtualRegister(li->reg) - && "Can't only shrink physical registers"); + && "Can only shrink virtual registers"); // Find all the values used, including PHI kills. SmallVector, 16> WorkList; @@ -766,8 +653,10 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, MachineInstr *UseMI = I.skipInstruction();) { if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) continue; - SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex(); - VNInfo *VNI = li->getVNInfoAt(Idx); + SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); + // Note: This intentionally picks up the wrong VNI in case of an EC redef. + // See below. + VNInfo *VNI = li->getVNInfoBefore(Idx); if (!VNI) { // This shouldn't happen: readsVirtualRegister returns true, but there is // no live value. It is likely caused by a target getting flags @@ -777,11 +666,12 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, << *li << '\n'); continue; } - if (VNI->def == Idx) { - // Special case: An early-clobber tied operand reads and writes the - // register one slot early. - Idx = Idx.getPrevSlot(); - VNI = li->getVNInfoAt(Idx); + // Special case: An early-clobber tied operand reads and writes the + // register one slot early. The getVNInfoBefore call above would have + // picked up the value defined by UseMI. Adjust the kill slot and value. + if (SlotIndex::isSameInstr(VNI->def, Idx)) { + Idx = VNI->def; + VNI = li->getVNInfoBefore(Idx); assert(VNI && "Early-clobber tied value not available"); } WorkList.push_back(std::make_pair(Idx, VNI)); @@ -794,14 +684,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, VNInfo *VNI = *I; if (VNI->isUnused()) continue; - NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI)); - - // A use tied to an early-clobber def ends at the load slot and isn't caught - // above. Catch it here instead. This probably only ever happens for inline - // assembly. - if (VNI->def.isUse()) - if (VNInfo *UVNI = li->getVNInfoAt(VNI->def.getLoadIndex())) - WorkList.push_back(std::make_pair(VNI->def.getLoadIndex(), UVNI)); + NewLI.addRange(LiveRange(VNI->def, VNI->def.getDeadSlot(), VNI)); } // Keep track of the PHIs that are in use. @@ -812,11 +695,11 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, SlotIndex Idx = WorkList.back().first; VNInfo *VNI = WorkList.back().second; WorkList.pop_back(); - const MachineBasicBlock *MBB = getMBBFromIndex(Idx); + const MachineBasicBlock *MBB = getMBBFromIndex(Idx.getPrevSlot()); SlotIndex BlockStart = getMBBStartIdx(MBB); // Extend the live range for VNI to be live at Idx. - if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx.getNextSlot())) { + if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) { (void)ExtVNI; assert(ExtVNI == VNI && "Unexpected existing value number"); // Is this a PHIDef we haven't seen before? @@ -827,9 +710,9 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, PE = MBB->pred_end(); PI != PE; ++PI) { if (!LiveOut.insert(*PI)) continue; - SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot(); + SlotIndex Stop = getMBBEndIdx(*PI); // A predecessor is not required to have a live-out value for a PHI. - if (VNInfo *PVNI = li->getVNInfoAt(Stop)) + if (VNInfo *PVNI = li->getVNInfoBefore(Stop)) WorkList.push_back(std::make_pair(Stop, PVNI)); } continue; @@ -837,15 +720,16 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // VNI is live-in to MBB. DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); - NewLI.addRange(LiveRange(BlockStart, Idx.getNextSlot(), VNI)); + NewLI.addRange(LiveRange(BlockStart, Idx, VNI)); // Make sure VNI is live-out from the predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { if (!LiveOut.insert(*PI)) continue; - SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot(); - assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor"); + SlotIndex Stop = getMBBEndIdx(*PI); + assert(li->getVNInfoBefore(Stop) == VNI && + "Wrong value out of predecessor"); WorkList.push_back(std::make_pair(Stop, VNI)); } } @@ -859,7 +743,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, continue; LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def); assert(LII != NewLI.end() && "Missing live range for PHI"); - if (LII->end != VNI->def.getNextSlot()) + if (LII->end != VNI->def.getDeadSlot()) continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. @@ -890,28 +774,6 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // Register allocator hooks. // -MachineBasicBlock::iterator -LiveIntervals::getLastSplitPoint(const LiveInterval &li, - MachineBasicBlock *mbb) const { - const MachineBasicBlock *lpad = mbb->getLandingPadSuccessor(); - - // If li is not live into a landing pad, we can insert spill code before the - // first terminator. - if (!lpad || !isLiveInToMBB(li, lpad)) - return mbb->getFirstTerminator(); - - // When there is a landing pad, spill code must go before the call instruction - // that can throw. - MachineBasicBlock::iterator I = mbb->end(), B = mbb->begin(); - while (I != B) { - --I; - if (I->getDesc().isCall()) - return I; - } - // The block contains no calls that can throw, so use the first terminator. - return mbb->getFirstTerminator(); -} - void LiveIntervals::addKillFlags() { for (iterator I = begin(), E = end(); I != E; ++I) { unsigned Reg = I->first; @@ -924,8 +786,8 @@ void LiveIntervals::addKillFlags() { // Every instruction that kills Reg corresponds to a live range end point. for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; ++RI) { - // A LOAD index indicates an MBB edge. - if (RI->end.isLoad()) + // A block index indicates an MBB edge. + if (RI->end.isBlock()) continue; MachineInstr *MI = getInstructionFromIndex(RI->end); if (!MI) @@ -949,16 +811,10 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li, if (Reg == 0 || Reg == li.reg) continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg) && - !allocatableRegs_[Reg]) + if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isAllocatable(Reg)) continue; - // FIXME: For now, only remat MI with at most one register operand. - assert(!RegOp && - "Can't rematerialize instruction with multiple register operand!"); RegOp = MO.getReg(); -#ifndef NDEBUG - break; -#endif + break; // Found vreg operand - leave the loop. } return RegOp; } @@ -1011,14 +867,6 @@ LiveIntervals::isReMaterializable(const LiveInterval &li, return true; } -/// isReMaterializable - Returns true if the definition MI of the specified -/// val# of the specified interval is re-materializable. -bool LiveIntervals::isReMaterializable(const LiveInterval &li, - const VNInfo *ValNo, MachineInstr *MI) { - bool Dummy2; - return isReMaterializable(li, ValNo, MI, 0, Dummy2); -} - /// isReMaterializable - Returns true if every definition of MI of every /// val# of the specified interval is re-materializable. bool @@ -1044,672 +892,28 @@ LiveIntervals::isReMaterializable(const LiveInterval &li, return true; } -/// FilterFoldedOps - Filter out two-address use operands. Return -/// true if it finds any issue with the operands that ought to prevent -/// folding. -static bool FilterFoldedOps(MachineInstr *MI, - SmallVector &Ops, - unsigned &MRInfo, - SmallVector &FoldOps) { - MRInfo = 0; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - unsigned OpIdx = Ops[i]; - MachineOperand &MO = MI->getOperand(OpIdx); - // FIXME: fold subreg use. - if (MO.getSubReg()) - return true; - if (MO.isDef()) - MRInfo |= (unsigned)VirtRegMap::isMod; - else { - // Filter out two-address use operand(s). - if (MI->isRegTiedToDefOperand(OpIdx)) { - MRInfo = VirtRegMap::isModRef; - continue; - } - MRInfo |= (unsigned)VirtRegMap::isRef; - } - FoldOps.push_back(OpIdx); - } - return false; -} +MachineBasicBlock* +LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const { + // A local live range must be fully contained inside the block, meaning it is + // defined and killed at instructions, not at block boundaries. It is not + // live in or or out of any block. + // + // It is technically possible to have a PHI-defined live range identical to a + // single block, but we are going to return false in that case. + SlotIndex Start = LI.beginIndex(); + if (Start.isBlock()) + return NULL; -/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from -/// slot / to reg or any rematerialized load into ith operand of specified -/// MI. If it is successul, MI is updated with the newly created MI and -/// returns true. -bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI, - VirtRegMap &vrm, MachineInstr *DefMI, - SlotIndex InstrIdx, - SmallVector &Ops, - bool isSS, int Slot, unsigned Reg) { - // If it is an implicit def instruction, just delete it. - if (MI->isImplicitDef()) { - RemoveMachineInstrFromMaps(MI); - vrm.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - ++numFolds; - return true; - } + SlotIndex Stop = LI.endIndex(); + if (Stop.isBlock()) + return NULL; - // Filter the list of operand indexes that are to be folded. Abort if - // any operand will prevent folding. - unsigned MRInfo = 0; - SmallVector FoldOps; - if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps)) - return false; - - // The only time it's safe to fold into a two address instruction is when - // it's folding reload and spill from / into a spill stack slot. - if (DefMI && (MRInfo & VirtRegMap::isMod)) - return false; - - MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(MI, FoldOps, Slot) - : tii_->foldMemoryOperand(MI, FoldOps, DefMI); - if (fmi) { - // Remember this instruction uses the spill slot. - if (isSS) vrm.addSpillSlotUse(Slot, fmi); - - // Attempt to fold the memory reference into the instruction. If - // we can do this, we don't need to insert spill code. - if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot)) - vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo); - vrm.transferSpillPts(MI, fmi); - vrm.transferRestorePts(MI, fmi); - vrm.transferEmergencySpills(MI, fmi); - ReplaceMachineInstrInMaps(MI, fmi); - MI->eraseFromParent(); - MI = fmi; - ++numFolds; - return true; - } - return false; -} - -/// canFoldMemoryOperand - Returns true if the specified load / store -/// folding is possible. -bool LiveIntervals::canFoldMemoryOperand(MachineInstr *MI, - SmallVector &Ops, - bool ReMat) const { - // Filter the list of operand indexes that are to be folded. Abort if - // any operand will prevent folding. - unsigned MRInfo = 0; - SmallVector FoldOps; - if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps)) - return false; - - // It's only legal to remat for a use, not a def. - if (ReMat && (MRInfo & VirtRegMap::isMod)) - return false; - - return tii_->canFoldMemoryOperand(MI, FoldOps); -} - -bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const { - LiveInterval::Ranges::const_iterator itr = li.ranges.begin(); - - MachineBasicBlock *mbb = indexes_->getMBBCoveringRange(itr->start, itr->end); - - if (mbb == 0) - return false; - - for (++itr; itr != li.ranges.end(); ++itr) { - MachineBasicBlock *mbb2 = - indexes_->getMBBCoveringRange(itr->start, itr->end); - - if (mbb2 != mbb) - return false; - } - - return true; -} - -/// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of -/// interval on to-be re-materialized operands of MI) with new register. -void LiveIntervals::rewriteImplicitOps(const LiveInterval &li, - MachineInstr *MI, unsigned NewVReg, - VirtRegMap &vrm) { - // There is an implicit use. That means one of the other operand is - // being remat'ed and the remat'ed instruction has li.reg as an - // use operand. Make sure we rewrite that as well. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - if (!vrm.isReMaterialized(Reg)) - continue; - MachineInstr *ReMatMI = vrm.getReMaterializedMI(Reg); - MachineOperand *UseMO = ReMatMI->findRegisterUseOperand(li.reg); - if (UseMO) - UseMO->setReg(NewVReg); - } -} - -/// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper functions -/// for addIntervalsForSpills to rewrite uses / defs for the given live range. -bool LiveIntervals:: -rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, - bool TrySplit, SlotIndex index, SlotIndex end, - MachineInstr *MI, - MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI, - unsigned Slot, int LdSlot, - bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, - VirtRegMap &vrm, - const TargetRegisterClass* rc, - SmallVector &ReMatIds, - const MachineLoopInfo *loopInfo, - unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse, - DenseMap &MBBVRegsMap, - std::vector &NewLIs) { - bool CanFold = false; - RestartInstruction: - for (unsigned i = 0; i != MI->getNumOperands(); ++i) { - MachineOperand& mop = MI->getOperand(i); - if (!mop.isReg()) - continue; - unsigned Reg = mop.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - if (Reg != li.reg) - continue; - - bool TryFold = !DefIsReMat; - bool FoldSS = true; // Default behavior unless it's a remat. - int FoldSlot = Slot; - if (DefIsReMat) { - // If this is the rematerializable definition MI itself and - // all of its uses are rematerialized, simply delete it. - if (MI == ReMatOrigDefMI && CanDelete) { - DEBUG(dbgs() << "\t\t\t\tErasing re-materializable def: " - << *MI << '\n'); - RemoveMachineInstrFromMaps(MI); - vrm.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - break; - } - - // If def for this use can't be rematerialized, then try folding. - // If def is rematerializable and it's a load, also try folding. - TryFold = !ReMatDefMI || (ReMatDefMI && (MI == ReMatOrigDefMI || isLoad)); - if (isLoad) { - // Try fold loads (from stack slot, constant pool, etc.) into uses. - FoldSS = isLoadSS; - FoldSlot = LdSlot; - } - } - - // Scan all of the operands of this instruction rewriting operands - // to use NewVReg instead of li.reg as appropriate. We do this for - // two reasons: - // - // 1. If the instr reads the same spilled vreg multiple times, we - // want to reuse the NewVReg. - // 2. If the instr is a two-addr instruction, we are required to - // keep the src/dst regs pinned. - // - // Keep track of whether we replace a use and/or def so that we can - // create the spill interval with the appropriate range. - SmallVector Ops; - tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops); - - // Create a new virtual register for the spill interval. - // Create the new register now so we can map the fold instruction - // to the new register so when it is unfolded we get the correct - // answer. - bool CreatedNewVReg = false; - if (NewVReg == 0) { - NewVReg = mri_->createVirtualRegister(rc); - vrm.grow(); - CreatedNewVReg = true; - - // The new virtual register should get the same allocation hints as the - // old one. - std::pair Hint = mri_->getRegAllocationHint(Reg); - if (Hint.first || Hint.second) - mri_->setRegAllocationHint(NewVReg, Hint.first, Hint.second); - } - - if (!TryFold) - CanFold = false; - else { - // Do not fold load / store here if we are splitting. We'll find an - // optimal point to insert a load / store later. - if (!TrySplit) { - if (tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, - Ops, FoldSS, FoldSlot, NewVReg)) { - // Folding the load/store can completely change the instruction in - // unpredictable ways, rescan it from the beginning. - - if (FoldSS) { - // We need to give the new vreg the same stack slot as the - // spilled interval. - vrm.assignVirt2StackSlot(NewVReg, FoldSlot); - } - - HasUse = false; - HasDef = false; - CanFold = false; - if (isNotInMIMap(MI)) - break; - goto RestartInstruction; - } - } else { - // We'll try to fold it later if it's profitable. - CanFold = canFoldMemoryOperand(MI, Ops, DefIsReMat); - } - } - - mop.setReg(NewVReg); - if (mop.isImplicit()) - rewriteImplicitOps(li, MI, NewVReg, vrm); - - // Reuse NewVReg for other reads. - bool HasEarlyClobber = false; - for (unsigned j = 0, e = Ops.size(); j != e; ++j) { - MachineOperand &mopj = MI->getOperand(Ops[j]); - mopj.setReg(NewVReg); - if (mopj.isImplicit()) - rewriteImplicitOps(li, MI, NewVReg, vrm); - if (mopj.isEarlyClobber()) - HasEarlyClobber = true; - } - - if (CreatedNewVReg) { - if (DefIsReMat) { - vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI); - if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) { - // Each valnum may have its own remat id. - ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg); - } else { - vrm.assignVirtReMatId(NewVReg, ReMatIds[VNI->id]); - } - if (!CanDelete || (HasUse && HasDef)) { - // If this is a two-addr instruction then its use operands are - // rematerializable but its def is not. It should be assigned a - // stack slot. - vrm.assignVirt2StackSlot(NewVReg, Slot); - } - } else { - vrm.assignVirt2StackSlot(NewVReg, Slot); - } - } else if (HasUse && HasDef && - vrm.getStackSlot(NewVReg) == VirtRegMap::NO_STACK_SLOT) { - // If this interval hasn't been assigned a stack slot (because earlier - // def is a deleted remat def), do it now. - assert(Slot != VirtRegMap::NO_STACK_SLOT); - vrm.assignVirt2StackSlot(NewVReg, Slot); - } - - // Re-matting an instruction with virtual register use. Add the - // register as an implicit use on the use MI. - if (DefIsReMat && ImpUse) - MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true)); - - // Create a new register interval for this spill / remat. - LiveInterval &nI = getOrCreateInterval(NewVReg); - if (CreatedNewVReg) { - NewLIs.push_back(&nI); - MBBVRegsMap.insert(std::make_pair(MI->getParent()->getNumber(), NewVReg)); - if (TrySplit) - vrm.setIsSplitFromReg(NewVReg, li.reg); - } - - if (HasUse) { - if (CreatedNewVReg) { - LiveRange LR(index.getLoadIndex(), index.getDefIndex(), - nI.getNextValue(SlotIndex(), 0, VNInfoAllocator)); - DEBUG(dbgs() << " +" << LR); - nI.addRange(LR); - } else { - // Extend the split live interval to this def / use. - SlotIndex End = index.getDefIndex(); - LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End, - nI.getValNumInfo(nI.getNumValNums()-1)); - DEBUG(dbgs() << " +" << LR); - nI.addRange(LR); - } - } - if (HasDef) { - // An early clobber starts at the use slot, except for an early clobber - // tied to a use operand (yes, that is a thing). - LiveRange LR(HasEarlyClobber && !HasUse ? - index.getUseIndex() : index.getDefIndex(), - index.getStoreIndex(), - nI.getNextValue(SlotIndex(), 0, VNInfoAllocator)); - DEBUG(dbgs() << " +" << LR); - nI.addRange(LR); - } - - DEBUG({ - dbgs() << "\t\t\t\tAdded new interval: "; - nI.print(dbgs(), tri_); - dbgs() << '\n'; - }); - } - return CanFold; -} -bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li, - const VNInfo *VNI, - MachineBasicBlock *MBB, - SlotIndex Idx) const { - return li.killedInRange(Idx.getNextSlot(), getMBBEndIdx(MBB)); -} - -/// RewriteInfo - Keep track of machine instrs that will be rewritten -/// during spilling. -namespace { - struct RewriteInfo { - SlotIndex Index; - MachineInstr *MI; - RewriteInfo(SlotIndex i, MachineInstr *mi) : Index(i), MI(mi) {} - }; - - struct RewriteInfoCompare { - bool operator()(const RewriteInfo &LHS, const RewriteInfo &RHS) const { - return LHS.Index < RHS.Index; - } - }; -} - -void LiveIntervals:: -rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, - LiveInterval::Ranges::const_iterator &I, - MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI, - unsigned Slot, int LdSlot, - bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, - VirtRegMap &vrm, - const TargetRegisterClass* rc, - SmallVector &ReMatIds, - const MachineLoopInfo *loopInfo, - BitVector &SpillMBBs, - DenseMap > &SpillIdxes, - BitVector &RestoreMBBs, - DenseMap > &RestoreIdxes, - DenseMap &MBBVRegsMap, - std::vector &NewLIs) { - bool AllCanFold = true; - unsigned NewVReg = 0; - SlotIndex start = I->start.getBaseIndex(); - SlotIndex end = I->end.getPrevSlot().getBaseIndex().getNextIndex(); - - // First collect all the def / use in this live range that will be rewritten. - // Make sure they are sorted according to instruction index. - std::vector RewriteMIs; - for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg), - re = mri_->reg_end(); ri != re; ) { - MachineInstr *MI = &*ri; - MachineOperand &O = ri.getOperand(); - ++ri; - if (MI->isDebugValue()) { - // Modify DBG_VALUE now that the value is in a spill slot. - if (Slot != VirtRegMap::MAX_STACK_SLOT || isLoadSS) { - uint64_t Offset = MI->getOperand(1).getImm(); - const MDNode *MDPtr = MI->getOperand(2).getMetadata(); - DebugLoc DL = MI->getDebugLoc(); - int FI = isLoadSS ? LdSlot : (int)Slot; - if (MachineInstr *NewDV = tii_->emitFrameIndexDebugValue(*mf_, FI, - Offset, MDPtr, DL)) { - DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); - ReplaceMachineInstrInMaps(MI, NewDV); - MachineBasicBlock *MBB = MI->getParent(); - MBB->insert(MBB->erase(MI), NewDV); - continue; - } - } - - DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); - RemoveMachineInstrFromMaps(MI); - vrm.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - continue; - } - assert(!(O.isImplicit() && O.isUse()) && - "Spilling register that's used as implicit use?"); - SlotIndex index = getInstructionIndex(MI); - if (index < start || index >= end) - continue; - - if (O.isUndef()) - // Must be defined by an implicit def. It should not be spilled. Note, - // this is for correctness reason. e.g. - // 8 %reg1024 = IMPLICIT_DEF - // 12 %reg1024 = INSERT_SUBREG %reg1024, %reg1025, 2 - // The live range [12, 14) are not part of the r1024 live interval since - // it's defined by an implicit def. It will not conflicts with live - // interval of r1025. Now suppose both registers are spilled, you can - // easily see a situation where both registers are reloaded before - // the INSERT_SUBREG and both target registers that would overlap. - continue; - RewriteMIs.push_back(RewriteInfo(index, MI)); - } - std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare()); - - unsigned ImpUse = DefIsReMat ? getReMatImplicitUse(li, ReMatDefMI) : 0; - // Now rewrite the defs and uses. - for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) { - RewriteInfo &rwi = RewriteMIs[i]; - ++i; - SlotIndex index = rwi.Index; - MachineInstr *MI = rwi.MI; - // If MI def and/or use the same register multiple times, then there - // are multiple entries. - while (i != e && RewriteMIs[i].MI == MI) { - assert(RewriteMIs[i].Index == index); - ++i; - } - MachineBasicBlock *MBB = MI->getParent(); - - if (ImpUse && MI != ReMatDefMI) { - // Re-matting an instruction with virtual register use. Prevent interval - // from being spilled. - getInterval(ImpUse).markNotSpillable(); - } - - unsigned MBBId = MBB->getNumber(); - unsigned ThisVReg = 0; - if (TrySplit) { - DenseMap::iterator NVI = MBBVRegsMap.find(MBBId); - if (NVI != MBBVRegsMap.end()) { - ThisVReg = NVI->second; - // One common case: - // x = use - // ... - // ... - // def = ... - // = use - // It's better to start a new interval to avoid artificially - // extend the new interval. - if (MI->readsWritesVirtualRegister(li.reg) == - std::make_pair(false,true)) { - MBBVRegsMap.erase(MBB->getNumber()); - ThisVReg = 0; - } - } - } - - bool IsNew = ThisVReg == 0; - if (IsNew) { - // This ends the previous live interval. If all of its def / use - // can be folded, give it a low spill weight. - if (NewVReg && TrySplit && AllCanFold) { - LiveInterval &nI = getOrCreateInterval(NewVReg); - nI.weight /= 10.0F; - } - AllCanFold = true; - } - NewVReg = ThisVReg; - - bool HasDef = false; - bool HasUse = false; - bool CanFold = rewriteInstructionForSpills(li, I->valno, TrySplit, - index, end, MI, ReMatOrigDefMI, ReMatDefMI, - Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, - CanDelete, vrm, rc, ReMatIds, loopInfo, NewVReg, - ImpUse, HasDef, HasUse, MBBVRegsMap, NewLIs); - if (!HasDef && !HasUse) - continue; - - AllCanFold &= CanFold; - - // Update weight of spill interval. - LiveInterval &nI = getOrCreateInterval(NewVReg); - if (!TrySplit) { - // The spill weight is now infinity as it cannot be spilled again. - nI.markNotSpillable(); - continue; - } - - // Keep track of the last def and first use in each MBB. - if (HasDef) { - if (MI != ReMatOrigDefMI || !CanDelete) { - bool HasKill = false; - if (!HasUse) - HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, index.getDefIndex()); - else { - // If this is a two-address code, then this index starts a new VNInfo. - const VNInfo *VNI = li.findDefinedVNInfoForRegInt(index.getDefIndex()); - if (VNI) - HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, index.getDefIndex()); - } - DenseMap >::iterator SII = - SpillIdxes.find(MBBId); - if (!HasKill) { - if (SII == SpillIdxes.end()) { - std::vector S; - S.push_back(SRInfo(index, NewVReg, true)); - SpillIdxes.insert(std::make_pair(MBBId, S)); - } else if (SII->second.back().vreg != NewVReg) { - SII->second.push_back(SRInfo(index, NewVReg, true)); - } else if (index > SII->second.back().index) { - // If there is an earlier def and this is a two-address - // instruction, then it's not possible to fold the store (which - // would also fold the load). - SRInfo &Info = SII->second.back(); - Info.index = index; - Info.canFold = !HasUse; - } - SpillMBBs.set(MBBId); - } else if (SII != SpillIdxes.end() && - SII->second.back().vreg == NewVReg && - index > SII->second.back().index) { - // There is an earlier def that's not killed (must be two-address). - // The spill is no longer needed. - SII->second.pop_back(); - if (SII->second.empty()) { - SpillIdxes.erase(MBBId); - SpillMBBs.reset(MBBId); - } - } - } - } - - if (HasUse) { - DenseMap >::iterator SII = - SpillIdxes.find(MBBId); - if (SII != SpillIdxes.end() && - SII->second.back().vreg == NewVReg && - index > SII->second.back().index) - // Use(s) following the last def, it's not safe to fold the spill. - SII->second.back().canFold = false; - DenseMap >::iterator RII = - RestoreIdxes.find(MBBId); - if (RII != RestoreIdxes.end() && RII->second.back().vreg == NewVReg) - // If we are splitting live intervals, only fold if it's the first - // use and there isn't another use later in the MBB. - RII->second.back().canFold = false; - else if (IsNew) { - // Only need a reload if there isn't an earlier def / use. - if (RII == RestoreIdxes.end()) { - std::vector Infos; - Infos.push_back(SRInfo(index, NewVReg, true)); - RestoreIdxes.insert(std::make_pair(MBBId, Infos)); - } else { - RII->second.push_back(SRInfo(index, NewVReg, true)); - } - RestoreMBBs.set(MBBId); - } - } - - // Update spill weight. - unsigned loopDepth = loopInfo->getLoopDepth(MBB); - nI.weight += getSpillWeight(HasDef, HasUse, loopDepth); - } - - if (NewVReg && TrySplit && AllCanFold) { - // If all of its def / use can be folded, give it a low spill weight. - LiveInterval &nI = getOrCreateInterval(NewVReg); - nI.weight /= 10.0F; - } -} - -bool LiveIntervals::alsoFoldARestore(int Id, SlotIndex index, - unsigned vr, BitVector &RestoreMBBs, - DenseMap > &RestoreIdxes) { - if (!RestoreMBBs[Id]) - return false; - std::vector &Restores = RestoreIdxes[Id]; - for (unsigned i = 0, e = Restores.size(); i != e; ++i) - if (Restores[i].index == index && - Restores[i].vreg == vr && - Restores[i].canFold) - return true; - return false; -} - -void LiveIntervals::eraseRestoreInfo(int Id, SlotIndex index, - unsigned vr, BitVector &RestoreMBBs, - DenseMap > &RestoreIdxes) { - if (!RestoreMBBs[Id]) - return; - std::vector &Restores = RestoreIdxes[Id]; - for (unsigned i = 0, e = Restores.size(); i != e; ++i) - if (Restores[i].index == index && Restores[i].vreg) - Restores[i].index = SlotIndex(); -} - -/// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being -/// spilled and create empty intervals for their uses. -void -LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm, - const TargetRegisterClass* rc, - std::vector &NewLIs) { - for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg), - re = mri_->reg_end(); ri != re; ) { - MachineOperand &O = ri.getOperand(); - MachineInstr *MI = &*ri; - ++ri; - if (MI->isDebugValue()) { - // Remove debug info for now. - O.setReg(0U); - DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); - continue; - } - if (O.isDef()) { - assert(MI->isImplicitDef() && - "Register def was not rewritten?"); - RemoveMachineInstrFromMaps(MI); - vrm.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - } else { - // This must be an use of an implicit_def so it's not part of the live - // interval. Create a new empty live interval for it. - // FIXME: Can we simply erase some of the instructions? e.g. Stores? - unsigned NewVReg = mri_->createVirtualRegister(rc); - vrm.grow(); - vrm.setIsImplicitlyDefined(NewVReg); - NewLIs.push_back(&getOrCreateInterval(NewVReg)); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == li.reg) { - MO.setReg(NewVReg); - MO.setIsUndef(); - } - } - } - } + // getMBBFromIndex doesn't need to search the MBB table when both indexes + // belong to proper instructions. + MachineBasicBlock *MBB1 = indexes_->getMBBFromIndex(Start); + MachineBasicBlock *MBB2 = indexes_->getMBBFromIndex(Stop); + return MBB1 == MBB2 ? MBB1 : NULL; } float @@ -1730,455 +934,611 @@ LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { return (isDef + isUse) * lc; } -static void normalizeSpillWeights(std::vector &NewLIs) { - for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) - NewLIs[i]->weight = - normalizeSpillWeight(NewLIs[i]->weight, NewLIs[i]->getSize()); -} - -std::vector LiveIntervals:: -addIntervalsForSpills(const LiveInterval &li, - const SmallVectorImpl *SpillIs, - const MachineLoopInfo *loopInfo, VirtRegMap &vrm) { - assert(li.isSpillable() && "attempt to spill already spilled interval!"); - - DEBUG({ - dbgs() << "\t\t\t\tadding intervals for spills for interval: "; - li.print(dbgs(), tri_); - dbgs() << '\n'; - }); - - // Each bit specify whether a spill is required in the MBB. - BitVector SpillMBBs(mf_->getNumBlockIDs()); - DenseMap > SpillIdxes; - BitVector RestoreMBBs(mf_->getNumBlockIDs()); - DenseMap > RestoreIdxes; - DenseMap MBBVRegsMap; - std::vector NewLIs; - const TargetRegisterClass* rc = mri_->getRegClass(li.reg); - - unsigned NumValNums = li.getNumValNums(); - SmallVector ReMatDefs; - ReMatDefs.resize(NumValNums, NULL); - SmallVector ReMatOrigDefs; - ReMatOrigDefs.resize(NumValNums, NULL); - SmallVector ReMatIds; - ReMatIds.resize(NumValNums, VirtRegMap::MAX_STACK_SLOT); - BitVector ReMatDelete(NumValNums); - unsigned Slot = VirtRegMap::MAX_STACK_SLOT; - - // Spilling a split live interval. It cannot be split any further. Also, - // it's also guaranteed to be a single val# / range interval. - if (vrm.getPreSplitReg(li.reg)) { - vrm.setIsSplitFromReg(li.reg, 0); - // Unset the split kill marker on the last use. - SlotIndex KillIdx = vrm.getKillPoint(li.reg); - if (KillIdx != SlotIndex()) { - MachineInstr *KillMI = getInstructionFromIndex(KillIdx); - assert(KillMI && "Last use disappeared?"); - int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true); - assert(KillOp != -1 && "Last use disappeared?"); - KillMI->getOperand(KillOp).setIsKill(false); - } - vrm.removeKillPoint(li.reg); - bool DefIsReMat = vrm.isReMaterialized(li.reg); - Slot = vrm.getStackSlot(li.reg); - assert(Slot != VirtRegMap::MAX_STACK_SLOT); - MachineInstr *ReMatDefMI = DefIsReMat ? - vrm.getReMaterializedMI(li.reg) : NULL; - int LdSlot = 0; - bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); - bool isLoad = isLoadSS || - (DefIsReMat && (ReMatDefMI->getDesc().canFoldAsLoad())); - bool IsFirstRange = true; - for (LiveInterval::Ranges::const_iterator - I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { - // If this is a split live interval with multiple ranges, it means there - // are two-address instructions that re-defined the value. Only the - // first def can be rematerialized! - if (IsFirstRange) { - // Note ReMatOrigDefMI has already been deleted. - rewriteInstructionsForSpills(li, false, I, NULL, ReMatDefMI, - Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, - false, vrm, rc, ReMatIds, loopInfo, - SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, - MBBVRegsMap, NewLIs); - } else { - rewriteInstructionsForSpills(li, false, I, NULL, 0, - Slot, 0, false, false, false, - false, vrm, rc, ReMatIds, loopInfo, - SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, - MBBVRegsMap, NewLIs); - } - IsFirstRange = false; - } - - handleSpilledImpDefs(li, vrm, rc, NewLIs); - normalizeSpillWeights(NewLIs); - return NewLIs; - } - - bool TrySplit = !intervalIsInOneMBB(li); - if (TrySplit) - ++numSplits; - bool NeedStackSlot = false; - for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end(); - i != e; ++i) { - const VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (VNI->isUnused()) - continue; // Dead val#. - // Is the def for the val# rematerializable? - MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def); - bool dummy; - if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) { - // Remember how to remat the def of this val#. - ReMatOrigDefs[VN] = ReMatDefMI; - // Original def may be modified so we have to make a copy here. - MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI); - CloneMIs.push_back(Clone); - ReMatDefs[VN] = Clone; - - bool CanDelete = true; - if (VNI->hasPHIKill()) { - // A kill is a phi node, not all of its uses can be rematerialized. - // It must not be deleted. - CanDelete = false; - // Need a stack slot if there is any live range where uses cannot be - // rematerialized. - NeedStackSlot = true; - } - if (CanDelete) - ReMatDelete.set(VN); - } else { - // Need a stack slot if there is any live range where uses cannot be - // rematerialized. - NeedStackSlot = true; - } - } - - // One stack slot per live interval. - if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) { - if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT) - Slot = vrm.assignVirt2StackSlot(li.reg); - - // This case only occurs when the prealloc splitter has already assigned - // a stack slot to this vreg. - else - Slot = vrm.getStackSlot(li.reg); - } - - // Create new intervals and rewrite defs and uses. - for (LiveInterval::Ranges::const_iterator - I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { - MachineInstr *ReMatDefMI = ReMatDefs[I->valno->id]; - MachineInstr *ReMatOrigDefMI = ReMatOrigDefs[I->valno->id]; - bool DefIsReMat = ReMatDefMI != NULL; - bool CanDelete = ReMatDelete[I->valno->id]; - int LdSlot = 0; - bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); - bool isLoad = isLoadSS || - (DefIsReMat && ReMatDefMI->getDesc().canFoldAsLoad()); - rewriteInstructionsForSpills(li, TrySplit, I, ReMatOrigDefMI, ReMatDefMI, - Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, - CanDelete, vrm, rc, ReMatIds, loopInfo, - SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, - MBBVRegsMap, NewLIs); - } - - // Insert spills / restores if we are splitting. - if (!TrySplit) { - handleSpilledImpDefs(li, vrm, rc, NewLIs); - normalizeSpillWeights(NewLIs); - return NewLIs; - } - - SmallPtrSet AddedKill; - SmallVector Ops; - if (NeedStackSlot) { - int Id = SpillMBBs.find_first(); - while (Id != -1) { - std::vector &spills = SpillIdxes[Id]; - for (unsigned i = 0, e = spills.size(); i != e; ++i) { - SlotIndex index = spills[i].index; - unsigned VReg = spills[i].vreg; - LiveInterval &nI = getOrCreateInterval(VReg); - bool isReMat = vrm.isReMaterialized(VReg); - MachineInstr *MI = getInstructionFromIndex(index); - bool CanFold = false; - bool FoundUse = false; - Ops.clear(); - if (spills[i].canFold) { - CanFold = true; - for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { - MachineOperand &MO = MI->getOperand(j); - if (!MO.isReg() || MO.getReg() != VReg) - continue; - - Ops.push_back(j); - if (MO.isDef()) - continue; - if (isReMat || - (!FoundUse && !alsoFoldARestore(Id, index, VReg, - RestoreMBBs, RestoreIdxes))) { - // MI has two-address uses of the same register. If the use - // isn't the first and only use in the BB, then we can't fold - // it. FIXME: Move this to rewriteInstructionsForSpills. - CanFold = false; - break; - } - FoundUse = true; - } - } - // Fold the store into the def if possible. - bool Folded = false; - if (CanFold && !Ops.empty()) { - if (tryFoldMemoryOperand(MI, vrm, NULL, index, Ops, true, Slot,VReg)){ - Folded = true; - if (FoundUse) { - // Also folded uses, do not issue a load. - eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes); - nI.removeRange(index.getLoadIndex(), index.getDefIndex()); - } - nI.removeRange(index.getDefIndex(), index.getStoreIndex()); - } - } - - // Otherwise tell the spiller to issue a spill. - if (!Folded) { - LiveRange *LR = &nI.ranges[nI.ranges.size()-1]; - bool isKill = LR->end == index.getStoreIndex(); - if (!MI->registerDefIsDead(nI.reg)) - // No need to spill a dead def. - vrm.addSpillPoint(VReg, isKill, MI); - if (isKill) - AddedKill.insert(&nI); - } - } - Id = SpillMBBs.find_next(Id); - } - } - - int Id = RestoreMBBs.find_first(); - while (Id != -1) { - std::vector &restores = RestoreIdxes[Id]; - for (unsigned i = 0, e = restores.size(); i != e; ++i) { - SlotIndex index = restores[i].index; - if (index == SlotIndex()) - continue; - unsigned VReg = restores[i].vreg; - LiveInterval &nI = getOrCreateInterval(VReg); - bool isReMat = vrm.isReMaterialized(VReg); - MachineInstr *MI = getInstructionFromIndex(index); - bool CanFold = false; - Ops.clear(); - if (restores[i].canFold) { - CanFold = true; - for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { - MachineOperand &MO = MI->getOperand(j); - if (!MO.isReg() || MO.getReg() != VReg) - continue; - - if (MO.isDef()) { - // If this restore were to be folded, it would have been folded - // already. - CanFold = false; - break; - } - Ops.push_back(j); - } - } - - // Fold the load into the use if possible. - bool Folded = false; - if (CanFold && !Ops.empty()) { - if (!isReMat) - Folded = tryFoldMemoryOperand(MI, vrm, NULL,index,Ops,true,Slot,VReg); - else { - MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg); - int LdSlot = 0; - bool isLoadSS = tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); - // If the rematerializable def is a load, also try to fold it. - if (isLoadSS || ReMatDefMI->getDesc().canFoldAsLoad()) - Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, - Ops, isLoadSS, LdSlot, VReg); - if (!Folded) { - unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI); - if (ImpUse) { - // Re-matting an instruction with virtual register use. Add the - // register as an implicit use on the use MI and mark the register - // interval as unspillable. - LiveInterval &ImpLi = getInterval(ImpUse); - ImpLi.markNotSpillable(); - MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true)); - } - } - } - } - // If folding is not possible / failed, then tell the spiller to issue a - // load / rematerialization for us. - if (Folded) - nI.removeRange(index.getLoadIndex(), index.getDefIndex()); - else - vrm.addRestorePoint(VReg, MI); - } - Id = RestoreMBBs.find_next(Id); - } - - // Finalize intervals: add kills, finalize spill weights, and filter out - // dead intervals. - std::vector RetNewLIs; - for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) { - LiveInterval *LI = NewLIs[i]; - if (!LI->empty()) { - if (!AddedKill.count(LI)) { - LiveRange *LR = &LI->ranges[LI->ranges.size()-1]; - SlotIndex LastUseIdx = LR->end.getBaseIndex(); - MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx); - int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false); - assert(UseIdx != -1); - if (!LastUse->isRegTiedToDefOperand(UseIdx)) { - LastUse->getOperand(UseIdx).setIsKill(); - vrm.addKillPoint(LI->reg, LastUseIdx); - } - } - RetNewLIs.push_back(LI); - } - } - - handleSpilledImpDefs(li, vrm, rc, RetNewLIs); - normalizeSpillWeights(RetNewLIs); - return RetNewLIs; -} - -/// hasAllocatableSuperReg - Return true if the specified physical register has -/// any super register that's allocatable. -bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const { - for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) - if (allocatableRegs_[*AS] && hasInterval(*AS)) - return true; - return false; -} - -/// getRepresentativeReg - Find the largest super register of the specified -/// physical register. -unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const { - // Find the largest super-register that is allocatable. - unsigned BestReg = Reg; - for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) { - unsigned SuperReg = *AS; - if (!hasAllocatableSuperReg(SuperReg) && hasInterval(SuperReg)) { - BestReg = SuperReg; - break; - } - } - return BestReg; -} - -/// getNumConflictsWithPhysReg - Return the number of uses and defs of the -/// specified interval that conflicts with the specified physical register. -unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li, - unsigned PhysReg) const { - unsigned NumConflicts = 0; - const LiveInterval &pli = getInterval(getRepresentativeReg(PhysReg)); - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg), - E = mri_->reg_end(); I != E; ++I) { - MachineOperand &O = I.getOperand(); - MachineInstr *MI = O.getParent(); - if (MI->isDebugValue()) - continue; - SlotIndex Index = getInstructionIndex(MI); - if (pli.liveAt(Index)) - ++NumConflicts; - } - return NumConflicts; -} - -/// spillPhysRegAroundRegDefsUses - Spill the specified physical register -/// around all defs and uses of the specified interval. Return true if it -/// was able to cut its interval. -bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, - unsigned PhysReg, VirtRegMap &vrm) { - unsigned SpillReg = getRepresentativeReg(PhysReg); - - DEBUG(dbgs() << "spillPhysRegAroundRegDefsUses " << tri_->getName(PhysReg) - << " represented by " << tri_->getName(SpillReg) << '\n'); - - for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS) - // If there are registers which alias PhysReg, but which are not a - // sub-register of the chosen representative super register. Assert - // since we can't handle it yet. - assert(*AS == SpillReg || !allocatableRegs_[*AS] || !hasInterval(*AS) || - tri_->isSuperRegister(*AS, SpillReg)); - - bool Cut = false; - SmallVector PRegs; - if (hasInterval(SpillReg)) - PRegs.push_back(SpillReg); - for (const unsigned *SR = tri_->getSubRegisters(SpillReg); *SR; ++SR) - if (hasInterval(*SR)) - PRegs.push_back(*SR); - - DEBUG({ - dbgs() << "Trying to spill:"; - for (unsigned i = 0, e = PRegs.size(); i != e; ++i) - dbgs() << ' ' << tri_->getName(PRegs[i]); - dbgs() << '\n'; - }); - - SmallPtrSet SeenMIs; - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg), - E = mri_->reg_end(); I != E; ++I) { - MachineOperand &O = I.getOperand(); - MachineInstr *MI = O.getParent(); - if (MI->isDebugValue() || SeenMIs.count(MI)) - continue; - SeenMIs.insert(MI); - SlotIndex Index = getInstructionIndex(MI); - bool LiveReg = false; - for (unsigned i = 0, e = PRegs.size(); i != e; ++i) { - unsigned PReg = PRegs[i]; - LiveInterval &pli = getInterval(PReg); - if (!pli.liveAt(Index)) - continue; - LiveReg = true; - SlotIndex StartIdx = Index.getLoadIndex(); - SlotIndex EndIdx = Index.getNextIndex().getBaseIndex(); - if (!pli.isInOneLiveRange(StartIdx, EndIdx)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!"; - if (MI->isInlineAsm()) { - Msg << "\nPlease check your inline asm statement for invalid " - << "constraints:\n"; - MI->print(Msg, tm_); - } - report_fatal_error(Msg.str()); - } - pli.removeRange(StartIdx, EndIdx); - LiveReg = true; - } - if (!LiveReg) - continue; - DEBUG(dbgs() << "Emergency spill around " << Index << '\t' << *MI); - vrm.addEmergencySpill(SpillReg, MI); - Cut = true; - } - return Cut; -} - LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, MachineInstr* startInst) { LiveInterval& Interval = getOrCreateInterval(reg); VNInfo* VN = Interval.getNextValue( - SlotIndex(getInstructionIndex(startInst).getDefIndex()), - startInst, getVNInfoAllocator()); + SlotIndex(getInstructionIndex(startInst).getRegSlot()), + getVNInfoAllocator()); VN->setHasPHIKill(true); LiveRange LR( - SlotIndex(getInstructionIndex(startInst).getDefIndex()), + SlotIndex(getInstructionIndex(startInst).getRegSlot()), getMBBEndIdx(startInst->getParent()), VN); Interval.addRange(LR); return LR; } + +//===----------------------------------------------------------------------===// +// Register mask functions +//===----------------------------------------------------------------------===// + +bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI, + BitVector &UsableRegs) { + if (LI.empty()) + return false; + LiveInterval::iterator LiveI = LI.begin(), LiveE = LI.end(); + + // Use a smaller arrays for local live ranges. + ArrayRef Slots; + ArrayRef Bits; + if (MachineBasicBlock *MBB = intervalIsInOneMBB(LI)) { + Slots = getRegMaskSlotsInBlock(MBB->getNumber()); + Bits = getRegMaskBitsInBlock(MBB->getNumber()); + } else { + Slots = getRegMaskSlots(); + Bits = getRegMaskBits(); + } + + // We are going to enumerate all the register mask slots contained in LI. + // Start with a binary search of RegMaskSlots to find a starting point. + ArrayRef::iterator SlotI = + std::lower_bound(Slots.begin(), Slots.end(), LiveI->start); + ArrayRef::iterator SlotE = Slots.end(); + + // No slots in range, LI begins after the last call. + if (SlotI == SlotE) + return false; + + bool Found = false; + for (;;) { + assert(*SlotI >= LiveI->start); + // Loop over all slots overlapping this segment. + while (*SlotI < LiveI->end) { + // *SlotI overlaps LI. Collect mask bits. + if (!Found) { + // This is the first overlap. Initialize UsableRegs to all ones. + UsableRegs.clear(); + UsableRegs.resize(tri_->getNumRegs(), true); + Found = true; + } + // Remove usable registers clobbered by this mask. + UsableRegs.clearBitsNotInMask(Bits[SlotI-Slots.begin()]); + if (++SlotI == SlotE) + return Found; + } + // *SlotI is beyond the current LI segment. + LiveI = LI.advanceTo(LiveI, *SlotI); + if (LiveI == LiveE) + return Found; + // Advance SlotI until it overlaps. + while (*SlotI < LiveI->start) + if (++SlotI == SlotE) + return Found; + } +} + +//===----------------------------------------------------------------------===// +// IntervalUpdate class. +//===----------------------------------------------------------------------===// + +// HMEditor is a toolkit used by handleMove to trim or extend live intervals. +class LiveIntervals::HMEditor { +private: + LiveIntervals& LIS; + const MachineRegisterInfo& MRI; + const TargetRegisterInfo& TRI; + SlotIndex NewIdx; + + typedef std::pair IntRangePair; + typedef DenseSet RangeSet; + + struct RegRanges { + LiveRange* Use; + LiveRange* EC; + LiveRange* Dead; + LiveRange* Def; + RegRanges() : Use(0), EC(0), Dead(0), Def(0) {} + }; + typedef DenseMap BundleRanges; + +public: + HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI, + const TargetRegisterInfo& TRI, SlotIndex NewIdx) + : LIS(LIS), MRI(MRI), TRI(TRI), NewIdx(NewIdx) {} + + // Update intervals for all operands of MI from OldIdx to NewIdx. + // This assumes that MI used to be at OldIdx, and now resides at + // NewIdx. + void moveAllRangesFrom(MachineInstr* MI, SlotIndex OldIdx) { + assert(NewIdx != OldIdx && "No-op move? That's a bit strange."); + + // Collect the operands. + RangeSet Entering, Internal, Exiting; + bool hasRegMaskOp = false; + collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx); + + // To keep the LiveRanges valid within an interval, move the ranges closest + // to the destination first. This prevents ranges from overlapping, to that + // APIs like removeRange still work. + if (NewIdx < OldIdx) { + moveAllEnteringFrom(OldIdx, Entering); + moveAllInternalFrom(OldIdx, Internal); + moveAllExitingFrom(OldIdx, Exiting); + } + else { + moveAllExitingFrom(OldIdx, Exiting); + moveAllInternalFrom(OldIdx, Internal); + moveAllEnteringFrom(OldIdx, Entering); + } + + if (hasRegMaskOp) + updateRegMaskSlots(OldIdx); + +#ifndef NDEBUG + LIValidator validator; + std::for_each(Entering.begin(), Entering.end(), validator); + std::for_each(Internal.begin(), Internal.end(), validator); + std::for_each(Exiting.begin(), Exiting.end(), validator); + assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness."); +#endif + + } + + // Update intervals for all operands of MI to refer to BundleStart's + // SlotIndex. + void moveAllRangesInto(MachineInstr* MI, MachineInstr* BundleStart) { + if (MI == BundleStart) + return; // Bundling instr with itself - nothing to do. + + SlotIndex OldIdx = LIS.getSlotIndexes()->getInstructionIndex(MI); + assert(LIS.getSlotIndexes()->getInstructionFromIndex(OldIdx) == MI && + "SlotIndex <-> Instruction mapping broken for MI"); + + // Collect all ranges already in the bundle. + MachineBasicBlock::instr_iterator BII(BundleStart); + RangeSet Entering, Internal, Exiting; + bool hasRegMaskOp = false; + collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx); + assert(!hasRegMaskOp && "Can't have RegMask operand in bundle."); + for (++BII; &*BII == MI || BII->isInsideBundle(); ++BII) { + if (&*BII == MI) + continue; + collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx); + assert(!hasRegMaskOp && "Can't have RegMask operand in bundle."); + } + + BundleRanges BR = createBundleRanges(Entering, Internal, Exiting); + + collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx); + assert(!hasRegMaskOp && "Can't have RegMask operand in bundle."); + + DEBUG(dbgs() << "Entering: " << Entering.size() << "\n"); + DEBUG(dbgs() << "Internal: " << Internal.size() << "\n"); + DEBUG(dbgs() << "Exiting: " << Exiting.size() << "\n"); + + moveAllEnteringFromInto(OldIdx, Entering, BR); + moveAllInternalFromInto(OldIdx, Internal, BR); + moveAllExitingFromInto(OldIdx, Exiting, BR); + + +#ifndef NDEBUG + LIValidator validator; + std::for_each(Entering.begin(), Entering.end(), validator); + std::for_each(Internal.begin(), Internal.end(), validator); + std::for_each(Exiting.begin(), Exiting.end(), validator); + assert(validator.rangesOk() && "moveAllOperandsInto broke liveness."); +#endif + } + +private: + +#ifndef NDEBUG + class LIValidator { + private: + DenseSet Checked, Bogus; + public: + void operator()(const IntRangePair& P) { + const LiveInterval* LI = P.first; + if (Checked.count(LI)) + return; + Checked.insert(LI); + if (LI->empty()) + return; + SlotIndex LastEnd = LI->begin()->start; + for (LiveInterval::const_iterator LRI = LI->begin(), LRE = LI->end(); + LRI != LRE; ++LRI) { + const LiveRange& LR = *LRI; + if (LastEnd > LR.start || LR.start >= LR.end) + Bogus.insert(LI); + LastEnd = LR.end; + } + } + + bool rangesOk() const { + return Bogus.empty(); + } + }; +#endif + + // Collect IntRangePairs for all operands of MI that may need fixing. + // Treat's MI's index as OldIdx (regardless of what it is in SlotIndexes' + // maps). + void collectRanges(MachineInstr* MI, RangeSet& Entering, RangeSet& Internal, + RangeSet& Exiting, bool& hasRegMaskOp, SlotIndex OldIdx) { + hasRegMaskOp = false; + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); + MOI != MOE; ++MOI) { + const MachineOperand& MO = *MOI; + + if (MO.isRegMask()) { + hasRegMaskOp = true; + continue; + } + + if (!MO.isReg() || MO.getReg() == 0) + continue; + + unsigned Reg = MO.getReg(); + + // TODO: Currently we're skipping uses that are reserved or have no + // interval, but we're not updating their kills. This should be + // fixed. + if (!LIS.hasInterval(Reg) || + (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg))) + continue; + + LiveInterval* LI = &LIS.getInterval(Reg); + + if (MO.readsReg()) { + LiveRange* LR = LI->getLiveRangeContaining(OldIdx); + if (LR != 0) + Entering.insert(std::make_pair(LI, LR)); + } + if (MO.isDef()) { + if (MO.isEarlyClobber()) { + LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot(true)); + assert(LR != 0 && "No EC range?"); + if (LR->end > OldIdx.getDeadSlot()) + Exiting.insert(std::make_pair(LI, LR)); + else + Internal.insert(std::make_pair(LI, LR)); + } else if (MO.isDead()) { + LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot()); + assert(LR != 0 && "No dead-def range?"); + Internal.insert(std::make_pair(LI, LR)); + } else { + LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getDeadSlot()); + assert(LR && LR->end > OldIdx.getDeadSlot() && + "Non-dead-def should have live range exiting."); + Exiting.insert(std::make_pair(LI, LR)); + } + } + } + } + + // Collect IntRangePairs for all operands of MI that may need fixing. + void collectRangesInBundle(MachineInstr* MI, RangeSet& Entering, + RangeSet& Exiting, SlotIndex MIStartIdx, + SlotIndex MIEndIdx) { + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); + MOI != MOE; ++MOI) { + const MachineOperand& MO = *MOI; + assert(!MO.isRegMask() && "Can't have RegMasks in bundles."); + if (!MO.isReg() || MO.getReg() == 0) + continue; + + unsigned Reg = MO.getReg(); + + // TODO: Currently we're skipping uses that are reserved or have no + // interval, but we're not updating their kills. This should be + // fixed. + if (!LIS.hasInterval(Reg) || + (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg))) + continue; + + LiveInterval* LI = &LIS.getInterval(Reg); + + if (MO.readsReg()) { + LiveRange* LR = LI->getLiveRangeContaining(MIStartIdx); + if (LR != 0) + Entering.insert(std::make_pair(LI, LR)); + } + if (MO.isDef()) { + assert(!MO.isEarlyClobber() && "Early clobbers not allowed in bundles."); + assert(!MO.isDead() && "Dead-defs not allowed in bundles."); + LiveRange* LR = LI->getLiveRangeContaining(MIEndIdx.getDeadSlot()); + assert(LR != 0 && "Internal ranges not allowed in bundles."); + Exiting.insert(std::make_pair(LI, LR)); + } + } + } + + BundleRanges createBundleRanges(RangeSet& Entering, RangeSet& Internal, RangeSet& Exiting) { + BundleRanges BR; + + for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); + EI != EE; ++EI) { + LiveInterval* LI = EI->first; + LiveRange* LR = EI->second; + BR[LI->reg].Use = LR; + } + + for (RangeSet::iterator II = Internal.begin(), IE = Internal.end(); + II != IE; ++II) { + LiveInterval* LI = II->first; + LiveRange* LR = II->second; + if (LR->end.isDead()) { + BR[LI->reg].Dead = LR; + } else { + BR[LI->reg].EC = LR; + } + } + + for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end(); + EI != EE; ++EI) { + LiveInterval* LI = EI->first; + LiveRange* LR = EI->second; + BR[LI->reg].Def = LR; + } + + return BR; + } + + void moveKillFlags(unsigned reg, SlotIndex OldIdx, SlotIndex newKillIdx) { + MachineInstr* OldKillMI = LIS.getInstructionFromIndex(OldIdx); + if (!OldKillMI->killsRegister(reg)) + return; // Bail out if we don't have kill flags on the old register. + MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx); + assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill."); + assert(!NewKillMI->killsRegister(reg) && "New kill instr is already a kill."); + OldKillMI->clearRegisterKills(reg, &TRI); + NewKillMI->addRegisterKilled(reg, &TRI); + } + + void updateRegMaskSlots(SlotIndex OldIdx) { + SmallVectorImpl::iterator RI = + std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(), + OldIdx); + assert(*RI == OldIdx && "No RegMask at OldIdx."); + *RI = NewIdx; + assert(*prior(RI) < *RI && *RI < *next(RI) && + "RegSlots out of order. Did you move one call across another?"); + } + + // Return the last use of reg between NewIdx and OldIdx. + SlotIndex findLastUseBefore(unsigned Reg, SlotIndex OldIdx) { + SlotIndex LastUse = NewIdx; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI.use_nodbg_begin(Reg), + UE = MRI.use_nodbg_end(); + UI != UE; UI.skipInstruction()) { + const MachineInstr* MI = &*UI; + SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI); + if (InstSlot > LastUse && InstSlot < OldIdx) + LastUse = InstSlot; + } + return LastUse; + } + + void moveEnteringUpFrom(SlotIndex OldIdx, IntRangePair& P) { + LiveInterval* LI = P.first; + LiveRange* LR = P.second; + bool LiveThrough = LR->end > OldIdx.getRegSlot(); + if (LiveThrough) + return; + SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx); + if (LastUse != NewIdx) + moveKillFlags(LI->reg, NewIdx, LastUse); + LR->end = LastUse.getRegSlot(); + } + + void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) { + LiveInterval* LI = P.first; + LiveRange* LR = P.second; + // Extend the LiveRange if NewIdx is past the end. + if (NewIdx > LR->end) { + // Move kill flags if OldIdx was not originally the end + // (otherwise LR->end points to an invalid slot). + if (LR->end.getRegSlot() != OldIdx.getRegSlot()) { + assert(LR->end > OldIdx && "LiveRange does not cover original slot"); + moveKillFlags(LI->reg, LR->end, NewIdx); + } + LR->end = NewIdx.getRegSlot(); + } + } + + void moveAllEnteringFrom(SlotIndex OldIdx, RangeSet& Entering) { + bool GoingUp = NewIdx < OldIdx; + + if (GoingUp) { + for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); + EI != EE; ++EI) + moveEnteringUpFrom(OldIdx, *EI); + } else { + for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); + EI != EE; ++EI) + moveEnteringDownFrom(OldIdx, *EI); + } + } + + void moveInternalFrom(SlotIndex OldIdx, IntRangePair& P) { + LiveInterval* LI = P.first; + LiveRange* LR = P.second; + assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() && + LR->end <= OldIdx.getDeadSlot() && + "Range should be internal to OldIdx."); + LiveRange Tmp(*LR); + Tmp.start = NewIdx.getRegSlot(LR->start.isEarlyClobber()); + Tmp.valno->def = Tmp.start; + Tmp.end = LR->end.isDead() ? NewIdx.getDeadSlot() : NewIdx.getRegSlot(); + LI->removeRange(*LR); + LI->addRange(Tmp); + } + + void moveAllInternalFrom(SlotIndex OldIdx, RangeSet& Internal) { + for (RangeSet::iterator II = Internal.begin(), IE = Internal.end(); + II != IE; ++II) + moveInternalFrom(OldIdx, *II); + } + + void moveExitingFrom(SlotIndex OldIdx, IntRangePair& P) { + LiveRange* LR = P.second; + assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() && + "Range should start in OldIdx."); + assert(LR->end > OldIdx.getDeadSlot() && "Range should exit OldIdx."); + SlotIndex NewStart = NewIdx.getRegSlot(LR->start.isEarlyClobber()); + LR->start = NewStart; + LR->valno->def = NewStart; + } + + void moveAllExitingFrom(SlotIndex OldIdx, RangeSet& Exiting) { + for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end(); + EI != EE; ++EI) + moveExitingFrom(OldIdx, *EI); + } + + void moveEnteringUpFromInto(SlotIndex OldIdx, IntRangePair& P, + BundleRanges& BR) { + LiveInterval* LI = P.first; + LiveRange* LR = P.second; + bool LiveThrough = LR->end > OldIdx.getRegSlot(); + if (LiveThrough) { + assert((LR->start < NewIdx || BR[LI->reg].Def == LR) && + "Def in bundle should be def range."); + assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) && + "If bundle has use for this reg it should be LR."); + BR[LI->reg].Use = LR; + return; + } + + SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx); + moveKillFlags(LI->reg, OldIdx, LastUse); + + if (LR->start < NewIdx) { + // Becoming a new entering range. + assert(BR[LI->reg].Dead == 0 && BR[LI->reg].Def == 0 && + "Bundle shouldn't be re-defining reg mid-range."); + assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) && + "Bundle shouldn't have different use range for same reg."); + LR->end = LastUse.getRegSlot(); + BR[LI->reg].Use = LR; + } else { + // Becoming a new Dead-def. + assert(LR->start == NewIdx.getRegSlot(LR->start.isEarlyClobber()) && + "Live range starting at unexpected slot."); + assert(BR[LI->reg].Def == LR && "Reg should have def range."); + assert(BR[LI->reg].Dead == 0 && + "Can't have def and dead def of same reg in a bundle."); + LR->end = LastUse.getDeadSlot(); + BR[LI->reg].Dead = BR[LI->reg].Def; + BR[LI->reg].Def = 0; + } + } + + void moveEnteringDownFromInto(SlotIndex OldIdx, IntRangePair& P, + BundleRanges& BR) { + LiveInterval* LI = P.first; + LiveRange* LR = P.second; + if (NewIdx > LR->end) { + // Range extended to bundle. Add to bundle uses. + // Note: Currently adds kill flags to bundle start. + assert(BR[LI->reg].Use == 0 && + "Bundle already has use range for reg."); + moveKillFlags(LI->reg, LR->end, NewIdx); + LR->end = NewIdx.getRegSlot(); + BR[LI->reg].Use = LR; + } else { + assert(BR[LI->reg].Use != 0 && + "Bundle should already have a use range for reg."); + } + } + + void moveAllEnteringFromInto(SlotIndex OldIdx, RangeSet& Entering, + BundleRanges& BR) { + bool GoingUp = NewIdx < OldIdx; + + if (GoingUp) { + for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); + EI != EE; ++EI) + moveEnteringUpFromInto(OldIdx, *EI, BR); + } else { + for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); + EI != EE; ++EI) + moveEnteringDownFromInto(OldIdx, *EI, BR); + } + } + + void moveInternalFromInto(SlotIndex OldIdx, IntRangePair& P, + BundleRanges& BR) { + // TODO: Sane rules for moving ranges into bundles. + } + + void moveAllInternalFromInto(SlotIndex OldIdx, RangeSet& Internal, + BundleRanges& BR) { + for (RangeSet::iterator II = Internal.begin(), IE = Internal.end(); + II != IE; ++II) + moveInternalFromInto(OldIdx, *II, BR); + } + + void moveExitingFromInto(SlotIndex OldIdx, IntRangePair& P, + BundleRanges& BR) { + LiveInterval* LI = P.first; + LiveRange* LR = P.second; + + assert(LR->start.isRegister() && + "Don't know how to merge exiting ECs into bundles yet."); + + if (LR->end > NewIdx.getDeadSlot()) { + // This range is becoming an exiting range on the bundle. + // If there was an old dead-def of this reg, delete it. + if (BR[LI->reg].Dead != 0) { + LI->removeRange(*BR[LI->reg].Dead); + BR[LI->reg].Dead = 0; + } + assert(BR[LI->reg].Def == 0 && + "Can't have two defs for the same variable exiting a bundle."); + LR->start = NewIdx.getRegSlot(); + LR->valno->def = LR->start; + BR[LI->reg].Def = LR; + } else { + // This range is becoming internal to the bundle. + assert(LR->end == NewIdx.getRegSlot() && + "Can't bundle def whose kill is before the bundle"); + if (BR[LI->reg].Dead || BR[LI->reg].Def) { + // Already have a def for this. Just delete range. + LI->removeRange(*LR); + } else { + // Make range dead, record. + LR->end = NewIdx.getDeadSlot(); + BR[LI->reg].Dead = LR; + assert(BR[LI->reg].Use == LR && + "Range becoming dead should currently be use."); + } + // In both cases the range is no longer a use on the bundle. + BR[LI->reg].Use = 0; + } + } + + void moveAllExitingFromInto(SlotIndex OldIdx, RangeSet& Exiting, + BundleRanges& BR) { + for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end(); + EI != EE; ++EI) + moveExitingFromInto(OldIdx, *EI, BR); + } + +}; + +void LiveIntervals::handleMove(MachineInstr* MI) { + SlotIndex OldIndex = indexes_->getInstructionIndex(MI); + indexes_->removeMachineInstrFromMaps(MI); + SlotIndex NewIndex = MI->isInsideBundle() ? + indexes_->getInstructionIndex(MI) : + indexes_->insertMachineInstrInMaps(MI); + assert(getMBBStartIdx(MI->getParent()) <= OldIndex && + OldIndex < getMBBEndIdx(MI->getParent()) && + "Cannot handle moves across basic block boundaries."); + assert(!MI->isBundled() && "Can't handle bundled instructions yet."); + + HMEditor HME(*this, *mri_, *tri_, NewIndex); + HME.moveAllRangesFrom(MI, OldIndex); +} + +void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, MachineInstr* BundleStart) { + SlotIndex NewIndex = indexes_->getInstructionIndex(BundleStart); + HMEditor HME(*this, *mri_, *tri_, NewIndex); + HME.moveAllRangesInto(MI, BundleStart); +} diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp index 110fe1e62024..60a68806c55e 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp @@ -21,6 +21,8 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" +#include + using namespace llvm; diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h index 5d64d285f39a..dbf5ac122d5d 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h +++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h @@ -20,8 +20,6 @@ #include "llvm/ADT/IntervalMap.h" #include "llvm/CodeGen/LiveInterval.h" -#include - namespace llvm { class MachineLoopRange; diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp index a7d5af5198e5..d8ab7918ae25 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -65,7 +65,7 @@ void LiveRangeCalc::extend(LiveInterval *LI, assert(DomTree && "Missing dominator tree"); MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill.getPrevSlot()); - assert(Kill && "No MBB at Kill"); + assert(KillMBB && "No MBB at Kill"); // Is there a def in the same MBB we can extend? if (LI->extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill)) @@ -237,7 +237,7 @@ void LiveRangeCalc::updateSSA(SlotIndexes *Indexes, assert(Alloc && "Need VNInfo allocator to create PHI-defs"); SlotIndex Start, End; tie(Start, End) = Indexes->getMBBRange(MBB); - VNInfo *VNI = I->LI->getNextValue(Start, 0, *Alloc); + VNInfo *VNI = I->LI->getNextValue(Start, *Alloc); VNI->setIsPHIDef(true); I->Value = VNI; // This block is done, we know the final value. diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp index b23f85165360..695f53631e1b 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -1,4 +1,4 @@ -//===--- LiveRangeEdit.cpp - Basic tools for editing a register live range --===// +//===-- LiveRangeEdit.cpp - Basic tools for editing a register live range -===// // // The LLVM Compiler Infrastructure // @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" -#include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" @@ -29,13 +29,14 @@ STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE"); STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE"); STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE"); -LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg, - LiveIntervals &LIS, - VirtRegMap &VRM) { - MachineRegisterInfo &MRI = VRM.getRegInfo(); +void LiveRangeEdit::Delegate::anchor() { } + +LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) { unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); - VRM.grow(); - VRM.setIsSplitFromReg(VReg, VRM.getOriginal(OldReg)); + if (VRM) { + VRM->grow(); + VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); + } LiveInterval &LI = LIS.getOrCreateInterval(VReg); newRegs_.push_back(&LI); return LI; @@ -43,37 +44,32 @@ LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg, bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, - const TargetInstrInfo &tii, AliasAnalysis *aa) { assert(DefMI && "Missing instruction"); scannedRemattable_ = true; - if (!tii.isTriviallyReMaterializable(DefMI, aa)) + if (!TII.isTriviallyReMaterializable(DefMI, aa)) return false; remattable_.insert(VNI); return true; } -void LiveRangeEdit::scanRemattable(LiveIntervals &lis, - const TargetInstrInfo &tii, - AliasAnalysis *aa) { +void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) { for (LiveInterval::vni_iterator I = parent_.vni_begin(), E = parent_.vni_end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->isUnused()) continue; - MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def); + MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def); if (!DefMI) continue; - checkRematerializable(VNI, DefMI, tii, aa); + checkRematerializable(VNI, DefMI, aa); } scannedRemattable_ = true; } -bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis, - const TargetInstrInfo &tii, - AliasAnalysis *aa) { +bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) { if (!scannedRemattable_) - scanRemattable(lis, tii, aa); + scanRemattable(aa); return !remattable_.empty(); } @@ -81,24 +77,18 @@ bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis, /// OrigIdx are also available with the same value at UseIdx. bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, - SlotIndex UseIdx, - LiveIntervals &lis) { - OrigIdx = OrigIdx.getUseIndex(); - UseIdx = UseIdx.getUseIndex(); + SlotIndex UseIdx) { + OrigIdx = OrigIdx.getRegSlot(true); + UseIdx = UseIdx.getRegSlot(true); for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = OrigMI->getOperand(i); if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; // Reserved registers are OK. - if (MO.isUndef() || !lis.hasInterval(MO.getReg())) + if (MO.isUndef() || !LIS.hasInterval(MO.getReg())) continue; - // We cannot depend on virtual registers in uselessRegs_. - if (uselessRegs_) - for (unsigned ui = 0, ue = uselessRegs_->size(); ui != ue; ++ui) - if ((*uselessRegs_)[ui]->reg == MO.getReg()) - return false; - LiveInterval &li = lis.getInterval(MO.getReg()); + LiveInterval &li = LIS.getInterval(MO.getReg()); const VNInfo *OVNI = li.getVNInfoAt(OrigIdx); if (!OVNI) continue; @@ -110,8 +100,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, bool LiveRangeEdit::canRematerializeAt(Remat &RM, SlotIndex UseIdx, - bool cheapAsAMove, - LiveIntervals &lis) { + bool cheapAsAMove) { assert(scannedRemattable_ && "Call anyRematerializable first"); // Use scanRemattable info. @@ -121,19 +110,19 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM, // No defining instruction provided. SlotIndex DefIdx; if (RM.OrigMI) - DefIdx = lis.getInstructionIndex(RM.OrigMI); + DefIdx = LIS.getInstructionIndex(RM.OrigMI); else { DefIdx = RM.ParentVNI->def; - RM.OrigMI = lis.getInstructionFromIndex(DefIdx); + RM.OrigMI = LIS.getInstructionFromIndex(DefIdx); assert(RM.OrigMI && "No defining instruction for remattable value"); } // If only cheap remats were requested, bail out early. - if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove()) + if (cheapAsAMove && !RM.OrigMI->isAsCheapAsAMove()) return false; // Verify that all used registers are available with the same values. - if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx, lis)) + if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx)) return false; return true; @@ -143,27 +132,22 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, const Remat &RM, - LiveIntervals &lis, - const TargetInstrInfo &tii, const TargetRegisterInfo &tri, bool Late) { assert(RM.OrigMI && "Invalid remat"); - tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); + TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); rematted_.insert(RM.ParentVNI); - return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) - .getDefIndex(); + return LIS.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) + .getRegSlot(); } -void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) { +void LiveRangeEdit::eraseVirtReg(unsigned Reg) { if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg)) LIS.removeInterval(Reg); } bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, - SmallVectorImpl &Dead, - MachineRegisterInfo &MRI, - LiveIntervals &LIS, - const TargetInstrInfo &TII) { + SmallVectorImpl &Dead) { MachineInstr *DefMI = 0, *UseMI = 0; // Check that there is a single def and a single use. @@ -174,7 +158,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, if (MO.isDef()) { if (DefMI && DefMI != MI) return false; - if (!MI->getDesc().canFoldAsLoad()) + if (!MI->canFoldAsLoad()) return false; DefMI = MI; } else if (!MO.isUndef()) { @@ -209,19 +193,17 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, } void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, - LiveIntervals &LIS, VirtRegMap &VRM, - const TargetInstrInfo &TII) { + ArrayRef RegsBeingSpilled) { SetVector, SmallPtrSet > ToShrink; - MachineRegisterInfo &MRI = VRM.getRegInfo(); for (;;) { // Erase all dead defs. while (!Dead.empty()) { MachineInstr *MI = Dead.pop_back_val(); assert(MI->allDefsAreDead() && "Def isn't really dead"); - SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex(); + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); // Never delete inline asm. if (MI->isInlineAsm()) { @@ -265,7 +247,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, LI.removeValNo(VNI); if (LI.empty()) { ToShrink.remove(&LI); - eraseVirtReg(Reg, LIS); + eraseVirtReg(Reg); } } } @@ -284,12 +266,26 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, // Shrink just one live interval. Then delete new dead defs. LiveInterval *LI = ToShrink.back(); ToShrink.pop_back(); - if (foldAsLoad(LI, Dead, MRI, LIS, TII)) + if (foldAsLoad(LI, Dead)) continue; if (delegate_) delegate_->LRE_WillShrinkVirtReg(LI->reg); if (!LIS.shrinkToUses(LI, &Dead)) continue; + + // Don't create new intervals for a register being spilled. + // The new intervals would have to be spilled anyway so its not worth it. + // Also they currently aren't spilled so creating them and not spilling + // them results in incorrect code. + bool BeingSpilled = false; + for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) { + if (LI->reg == RegsBeingSpilled[i]) { + BeingSpilled = true; + break; + } + } + + if (BeingSpilled) continue; // LI may have been separated, create new intervals. LI->RenumberValues(LIS); @@ -298,16 +294,16 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, if (NumComp <= 1) continue; ++NumFracRanges; - bool IsOriginal = VRM.getOriginal(LI->reg) == LI->reg; + bool IsOriginal = VRM && VRM->getOriginal(LI->reg) == LI->reg; DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); SmallVector Dups(1, LI); for (unsigned i = 1; i != NumComp; ++i) { - Dups.push_back(&createFrom(LI->reg, LIS, VRM)); + Dups.push_back(&createFrom(LI->reg)); // If LI is an original interval that hasn't been split yet, make the new // intervals their own originals instead of referring to LI. The original // interval must contain all the split products, and LI doesn't. if (IsOriginal) - VRM.setIsSplitFromReg(Dups.back()->reg, 0); + VRM->setIsSplitFromReg(Dups.back()->reg, 0); if (delegate_) delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); } @@ -316,10 +312,8 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, } void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, - LiveIntervals &LIS, const MachineLoopInfo &Loops) { VirtRegAuxInfo VRAI(MF, LIS, Loops); - MachineRegisterInfo &MRI = MF.getRegInfo(); for (iterator I = begin(), E = end(); I != E; ++I) { LiveInterval &LI = **I; if (MRI.recomputeRegClass(LI.reg, MF.getTarget())) diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp index 2ca90f9f05c0..5a0d97d132dd 100644 --- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp @@ -14,7 +14,7 @@ // the instruction, but are never used after the instruction (i.e., they are // killed). // -// This class computes live variables using are sparse implementation based on +// This class computes live variables using a sparse implementation based on // the machine code SSA form. This class computes live variable information for // each virtual and _register allocatable_ physical register in a function. It // uses the dominance properties of SSA form to efficiently compute live @@ -33,6 +33,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -41,6 +42,7 @@ using namespace llvm; char LiveVariables::ID = 0; +char &llvm::LiveVariablesID = LiveVariables::ID; INITIALIZE_PASS_BEGIN(LiveVariables, "livevars", "Live Variable Analysis", false, false) INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim) @@ -90,7 +92,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, MachineBasicBlock *MBB, std::vector &WorkList) { unsigned BBNum = MBB->getNumber(); - + // Check to see if this basic block is one of the killing blocks. If so, // remove it. for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i) @@ -98,7 +100,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry break; } - + if (MBB == DefBlock) return; // Terminate recursion if (VRInfo.AliveBlocks.test(BBNum)) @@ -107,6 +109,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, // Mark the variable known alive in this bb VRInfo.AliveBlocks.set(BBNum); + assert(MBB != &MF->front() && "Can't find reaching def for virtreg"); WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend()); } @@ -130,7 +133,6 @@ void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, unsigned BBNum = MBB->getNumber(); VarInfo& VRInfo = getVarInfo(reg); - VRInfo.NumUses++; // Check to see if this basic block is already a kill block. if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) { @@ -190,7 +192,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, unsigned LastDefReg = 0; unsigned LastDefDist = 0; MachineInstr *LastDef = NULL; - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { MachineInstr *Def = PhysRegDef[SubReg]; if (!Def) @@ -214,7 +216,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, unsigned DefReg = MO.getReg(); if (TRI->isSubRegister(Reg, DefReg)) { PartDefRegs.insert(DefReg); - for (const unsigned *SubRegs = TRI->getSubRegisters(DefReg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(DefReg); unsigned SubReg = *SubRegs; ++SubRegs) PartDefRegs.insert(SubReg); } @@ -245,7 +247,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { true/*IsImp*/)); PhysRegDef[Reg] = LastPartialDef; SmallSet Processed; - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { if (Processed.count(SubReg)) continue; @@ -257,20 +259,19 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { false/*IsDef*/, true/*IsImp*/)); PhysRegDef[SubReg] = LastPartialDef; - for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) Processed.insert(*SS); } } - } - else if (LastDef && !PhysRegUse[Reg] && - !LastDef->findRegisterDefOperand(Reg)) + } else if (LastDef && !PhysRegUse[Reg] && + !LastDef->findRegisterDefOperand(Reg)) // Last def defines the super register, add an implicit def of reg. - LastDef->addOperand(MachineOperand::CreateReg(Reg, - true/*IsDef*/, true/*IsImp*/)); + LastDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/, + true/*IsImp*/)); // Remember this use. PhysRegUse[Reg] = MI; - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) PhysRegUse[SubReg] = MI; } @@ -286,7 +287,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; unsigned LastPartDefDist = 0; - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { MachineInstr *Def = PhysRegDef[SubReg]; if (Def && Def != LastDef) { @@ -331,11 +332,11 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { // Or whole register is defined, but only partly used. // AX = AL // = AL - // AX = + // AX = MachineInstr *LastPartDef = 0; unsigned LastPartDefDist = 0; SmallSet PartUses; - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { MachineInstr *Def = PhysRegDef[SubReg]; if (Def && Def != LastDef) { @@ -350,7 +351,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { } if (MachineInstr *Use = PhysRegUse[SubReg]) { PartUses.insert(SubReg); - for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) PartUses.insert(*SS); unsigned Dist = DistanceMap[Use]; if (Dist > LastRefOrPartRefDist) { @@ -366,7 +367,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { // EAX = op AL // That is, EAX def is dead but AL def extends pass it. PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true); - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { if (!PartUses.count(SubReg)) continue; @@ -387,11 +388,11 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { else { LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); PhysRegUse[SubReg] = LastRefOrPartRef; - for (const unsigned *SSRegs = TRI->getSubRegisters(SubReg); + for (const uint16_t *SSRegs = TRI->getSubRegisters(SubReg); unsigned SSReg = *SSRegs; ++SSRegs) PhysRegUse[SSReg] = LastRefOrPartRef; } - for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) PartUses.erase(*SS); } } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) { @@ -419,16 +420,37 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { return true; } +void LiveVariables::HandleRegMask(const MachineOperand &MO) { + // Call HandlePhysRegKill() for all live registers clobbered by Mask. + // Clobbered registers are always dead, sp there is no need to use + // HandlePhysRegDef(). + for (unsigned Reg = 1, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) { + // Skip dead regs. + if (!PhysRegDef[Reg] && !PhysRegUse[Reg]) + continue; + // Skip mask-preserved regs. + if (!MO.clobbersPhysReg(Reg)) + continue; + // Kill the largest clobbered super-register. + // This avoids needless implicit operands. + unsigned Super = Reg; + for (const uint16_t *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) + if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR)) + Super = *SR; + HandlePhysRegKill(Super, 0); + } +} + void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, SmallVector &Defs) { // What parts of the register are previously defined? SmallSet Live; if (PhysRegDef[Reg] || PhysRegUse[Reg]) { Live.insert(Reg); - for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS) + for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS) Live.insert(*SS); } else { - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { // If a register isn't itself defined, but all parts that make up of it // are defined, then consider it also defined. @@ -440,7 +462,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, continue; if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) { Live.insert(SubReg); - for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) Live.insert(*SS); } } @@ -450,7 +472,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, // is referenced. HandlePhysRegKill(Reg, MI); // Only some of the sub-registers are used. - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { if (!Live.count(SubReg)) // Skip if this sub-register isn't defined. @@ -469,7 +491,7 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, Defs.pop_back(); PhysRegDef[Reg] = MI; PhysRegUse[Reg] = NULL; - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { PhysRegDef[SubReg] = MI; PhysRegUse[SubReg] = NULL; @@ -492,6 +514,12 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); PHIJoins.clear(); + // FIXME: LiveIntervals will be updated to remove its dependence on + // LiveVariables to improve compilation time and eliminate bizarre pass + // dependencies. Until then, we can't change much in -O0. + if (!MRI->isSSA()) + report_fatal_error("regalloc=... not currently supported with -O0"); + analyzePHINodes(mf); // Calculate live variable information in depth first order on the CFG of the @@ -536,8 +564,13 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // Clear kill and dead markers. LV will recompute them. SmallVector UseRegs; SmallVector DefRegs; + SmallVector RegMasks; for (unsigned i = 0; i != NumOperandsToProcess; ++i) { MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) { + RegMasks.push_back(i); + continue; + } if (!MO.isReg() || MO.getReg() == 0) continue; unsigned MOReg = MO.getReg(); @@ -559,6 +592,10 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { HandlePhysRegUse(MOReg, MI); } + // Process all masked registers. (Call clobbers). + for (unsigned i = 0, e = RegMasks.size(); i != e; ++i) + HandleRegMask(MI->getOperand(RegMasks[i])); + // Process all defs. for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { unsigned MOReg = DefRegs[i]; @@ -590,8 +627,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // them. The tail callee need not take the same registers as input // that it produces as output, and there are dependencies for its input // registers elsewhere. - if (!MBB->empty() && MBB->back().getDesc().isReturn() - && !MBB->back().getDesc().isCall()) { + if (!MBB->empty() && MBB->back().isReturn() + && !MBB->back().isCall()) { MachineInstr *Ret = &MBB->back(); for (MachineRegisterInfo::liveout_iterator @@ -607,10 +644,27 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { } } + // MachineCSE may CSE instructions which write to non-allocatable physical + // registers across MBBs. Remember if any reserved register is liveout. + SmallSet LiveOuts; + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + if (SuccMBB->isLandingPad()) + continue; + for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(), + LE = SuccMBB->livein_end(); LI != LE; ++LI) { + unsigned LReg = *LI; + if (!TRI->isInAllocatableClass(LReg)) + // Ignore other live-ins, e.g. those that are live into landing pads. + LiveOuts.insert(LReg); + } + } + // Loop over PhysRegDef / PhysRegUse, killing any registers that are // available at the end of the basic block. for (unsigned i = 0; i != NumRegs; ++i) - if (PhysRegDef[i] || PhysRegUse[i]) + if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i)) HandlePhysRegDef(i, 0, Defs); std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); @@ -754,7 +808,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, const unsigned NumNew = BB->getNumber(); // All registers used by PHI nodes in SuccBB must be live through BB. - for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(), + for (MachineBasicBlock::iterator BBI = SuccBB->begin(), BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI) for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) if (BBI->getOperand(i+1).getMBB() == BB) diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index 1318d6212497..238bf52dfed7 100644 --- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -71,19 +71,15 @@ namespace { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } - const char *getPassName() const { - return "Local Stack Slot Allocation"; - } private: }; } // end anonymous namespace char LocalStackSlotPass::ID = 0; - -FunctionPass *llvm::createLocalStackSlotAllocationPass() { - return new LocalStackSlotPass(); -} +char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID; +INITIALIZE_PASS(LocalStackSlotPass, "localstackalloc", + "Local Stack Slot Allocation", false, false) bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { MachineFrameInfo *MFI = MF.getFrameInfo(); diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp index 4c5fe4c480a6..6c8a1072697c 100644 --- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -73,7 +73,8 @@ void ilist_traits::addNodeToList(MachineBasicBlock *N) { // Make sure the instructions have their operands in the reginfo lists. MachineRegisterInfo &RegInfo = MF.getRegInfo(); - for (MachineBasicBlock::iterator I = N->begin(), E = N->end(); I != E; ++I) + for (MachineBasicBlock::instr_iterator + I = N->instr_begin(), E = N->instr_end(); I != E; ++I) I->AddRegOperandsToUseLists(RegInfo); LeakDetector::removeGarbageObject(N); @@ -120,8 +121,8 @@ void ilist_traits::removeNodeFromList(MachineInstr *N) { /// lists. void ilist_traits:: transferNodesFromList(ilist_traits &fromList, - MachineBasicBlock::iterator first, - MachineBasicBlock::iterator last) { + ilist_iterator first, + ilist_iterator last) { assert(Parent->getParent() == fromList.Parent->getParent() && "MachineInstr parent mismatch!"); @@ -140,33 +141,75 @@ void ilist_traits::deleteNode(MachineInstr* MI) { } MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() { - iterator I = begin(); - while (I != end() && I->isPHI()) + instr_iterator I = instr_begin(), E = instr_end(); + while (I != E && I->isPHI()) ++I; + assert(!I->isInsideBundle() && "First non-phi MI cannot be inside a bundle!"); return I; } MachineBasicBlock::iterator MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { - while (I != end() && (I->isPHI() || I->isLabel() || I->isDebugValue())) + iterator E = end(); + while (I != E && (I->isPHI() || I->isLabel() || I->isDebugValue())) ++I; + // FIXME: This needs to change if we wish to bundle labels / dbg_values + // inside the bundle. + assert(!I->isInsideBundle() && + "First non-phi / non-label instruction is inside a bundle!"); return I; } MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { - iterator I = end(); - while (I != begin() && ((--I)->getDesc().isTerminator() || I->isDebugValue())) + iterator B = begin(), E = end(), I = E; + while (I != B && ((--I)->isTerminator() || I->isDebugValue())) ; /*noop */ - while (I != end() && !I->getDesc().isTerminator()) + while (I != E && !I->isTerminator()) + ++I; + return I; +} + +MachineBasicBlock::const_iterator +MachineBasicBlock::getFirstTerminator() const { + const_iterator B = begin(), E = end(), I = E; + while (I != B && ((--I)->isTerminator() || I->isDebugValue())) + ; /*noop */ + while (I != E && !I->isTerminator()) + ++I; + return I; +} + +MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() { + instr_iterator B = instr_begin(), E = instr_end(), I = E; + while (I != B && ((--I)->isTerminator() || I->isDebugValue())) + ; /*noop */ + while (I != E && !I->isTerminator()) ++I; return I; } MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() { - iterator B = begin(), I = end(); + // Skip over end-of-block dbg_value instructions. + instr_iterator B = instr_begin(), I = instr_end(); while (I != B) { --I; - if (I->isDebugValue()) + // Return instruction that starts a bundle. + if (I->isDebugValue() || I->isInsideBundle()) + continue; + return I; + } + // The block is all debug values. + return end(); +} + +MachineBasicBlock::const_iterator +MachineBasicBlock::getLastNonDebugInstr() const { + // Skip over end-of-block dbg_value instructions. + const_instr_iterator B = instr_begin(), I = instr_end(); + while (I != B) { + --I; + // Return instruction that starts a bundle. + if (I->isDebugValue() || I->isInsideBundle()) continue; return I; } @@ -195,6 +238,18 @@ StringRef MachineBasicBlock::getName() const { return "(null)"; } +/// Return a hopefully unique identifier for this block. +std::string MachineBasicBlock::getFullName() const { + std::string Name; + if (getParent()) + Name = (getParent()->getFunction()->getName() + ":").str(); + if (getBasicBlock()) + Name += getBasicBlock()->getName(); + else + Name += (Twine("BB") + Twine(getNumber())).str(); + return Name; +} + void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { const MachineFunction *MF = getParent(); if (!MF) { @@ -203,8 +258,6 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { return; } - if (Alignment) { OS << "Alignment " << Alignment << "\n"; } - if (Indexes) OS << Indexes->getMBBStartIdx(this) << '\t'; @@ -218,6 +271,12 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { } if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; } if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; } + if (Alignment) { + OS << Comma << "Align " << Alignment << " (" << (1u << Alignment) + << " bytes)"; + Comma = ", "; + } + OS << '\n'; const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); @@ -237,13 +296,15 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { OS << '\n'; } - for (const_iterator I = begin(); I != end(); ++I) { + for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) { if (Indexes) { if (Indexes->hasIndex(I)) OS << Indexes->getInstructionIndex(I); OS << '\t'; } OS << '\t'; + if (I->isInsideBundle()) + OS << " * "; I->print(OS, &getParent()->getTarget()); } @@ -260,8 +321,8 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { void MachineBasicBlock::removeLiveIn(unsigned Reg) { std::vector::iterator I = std::find(LiveIns.begin(), LiveIns.end(), Reg); - assert(I != LiveIns.end() && "Not a live in!"); - LiveIns.erase(I); + if (I != LiveIns.end()) + LiveIns.erase(I); } bool MachineBasicBlock::isLiveIn(unsigned Reg) const { @@ -297,8 +358,22 @@ void MachineBasicBlock::updateTerminator() { TII->RemoveBranch(*this); } else { // The block has an unconditional fallthrough. If its successor is not - // its layout successor, insert a branch. - TBB = *succ_begin(); + // its layout successor, insert a branch. First we have to locate the + // only non-landing-pad successor, as that is the fallthrough block. + for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { + if ((*SI)->isLandingPad()) + continue; + assert(!TBB && "Found more than one non-landing-pad successor!"); + TBB = *SI; + } + + // If there is no non-landing-pad successor, the block has no + // fall-through edges to be concerned with. + if (!TBB) + return; + + // Finally update the unconditional successor to be reached via a branch + // if it would not be reached by fallthrough. if (!isLayoutSuccessor(TBB)) TII->InsertBranch(*this, TBB, 0, Cond, dl); } @@ -435,8 +510,8 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { fromMBB->removeSuccessor(Succ); // Fix up any PHI nodes in the successor. - for (MachineBasicBlock::iterator MI = Succ->begin(), ME = Succ->end(); - MI != ME && MI->isPHI(); ++MI) + for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(), + ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI) for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) { MachineOperand &MO = MI->getOperand(i); if (MO.getMBB() == fromMBB) @@ -473,13 +548,10 @@ bool MachineBasicBlock::canFallThrough() { if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) { // If we couldn't analyze the branch, examine the last instruction. // If the block doesn't end in a known control barrier, assume fallthrough - // is possible. The isPredicable check is needed because this code can be + // is possible. The isPredicated check is needed because this code can be // called during IfConversion, where an instruction which is normally a - // Barrier is predicated and thus no longer an actual control barrier. This - // is over-conservative though, because if an instruction isn't actually - // predicated we could still treat it like a barrier. - return empty() || !back().getDesc().isBarrier() || - back().getDesc().isPredicable(); + // Barrier is predicated and thus no longer an actual control barrier. + return empty() || !back().isBarrier() || TII->isPredicated(&back()); } // If there is no branch, control always falls through. @@ -538,14 +610,16 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // Collect a list of virtual registers killed by the terminators. SmallVector KilledRegs; if (LV) - for (iterator I = getFirstTerminator(), E = end(); I != E; ++I) { + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) { MachineInstr *MI = I; for (MachineInstr::mop_iterator OI = MI->operands_begin(), OE = MI->operands_end(); OI != OE; ++OI) { - if (!OI->isReg() || !OI->isUse() || !OI->isKill() || OI->isUndef()) + if (!OI->isReg() || OI->getReg() == 0 || + !OI->isUse() || !OI->isKill() || OI->isUndef()) continue; unsigned Reg = OI->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg) && + if (TargetRegisterInfo::isPhysicalRegister(Reg) || LV->getVarInfo(Reg).removeKill(MI)) { KilledRegs.push_back(Reg); DEBUG(dbgs() << "Removing terminator kill: " << *MI); @@ -565,7 +639,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { } // Fix PHI nodes in Succ so they refer to NMBB instead of this - for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end(); + for (MachineBasicBlock::instr_iterator + i = Succ->instr_begin(),e = Succ->instr_end(); i != e && i->isPHI(); ++i) for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) if (i->getOperand(ni+1).getMBB() == this) @@ -577,14 +652,16 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { NMBB->addLiveIn(*I); // Update LiveVariables. + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); if (LV) { // Restore kills of virtual registers that were killed by the terminators. while (!KilledRegs.empty()) { unsigned Reg = KilledRegs.pop_back_val(); - for (iterator I = end(), E = begin(); I != E;) { - if (!(--I)->addRegisterKilled(Reg, NULL, /* addIfNotFound= */ false)) + for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) { + if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false)) continue; - LV->getVarInfo(Reg).Kills.push_back(I); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + LV->getVarInfo(Reg).Kills.push_back(I); DEBUG(dbgs() << "Restored terminator kill: " << *I); break; } @@ -650,6 +727,42 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { return NMBB; } +MachineBasicBlock::iterator +MachineBasicBlock::erase(MachineBasicBlock::iterator I) { + if (I->isBundle()) { + MachineBasicBlock::iterator E = llvm::next(I); + return Insts.erase(I.getInstrIterator(), E.getInstrIterator()); + } + + return Insts.erase(I.getInstrIterator()); +} + +MachineInstr *MachineBasicBlock::remove(MachineInstr *I) { + if (I->isBundle()) { + instr_iterator MII = llvm::next(I); + iterator E = end(); + while (MII != E && MII->isInsideBundle()) { + MachineInstr *MI = &*MII++; + Insts.remove(MI); + } + } + + return Insts.remove(I); +} + +void MachineBasicBlock::splice(MachineBasicBlock::iterator where, + MachineBasicBlock *Other, + MachineBasicBlock::iterator From) { + if (From->isBundle()) { + MachineBasicBlock::iterator To = llvm::next(From); + Insts.splice(where.getInstrIterator(), Other->Insts, + From.getInstrIterator(), To.getInstrIterator()); + return; + } + + Insts.splice(where.getInstrIterator(), Other->Insts, From.getInstrIterator()); +} + /// removeFromParent - This method unlinks 'this' from the containing function, /// and returns it, but does not delete it. MachineBasicBlock *MachineBasicBlock::removeFromParent() { @@ -673,10 +786,10 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New) { assert(Old != New && "Cannot replace self with self!"); - MachineBasicBlock::iterator I = end(); - while (I != begin()) { + MachineBasicBlock::instr_iterator I = instr_end(); + while (I != instr_begin()) { --I; - if (!I->getDesc().isTerminator()) break; + if (!I->isTerminator()) break; // Scan the operands of this machine instruction, replacing any uses of Old // with New. @@ -755,27 +868,27 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, /// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping /// any DBG_VALUE instructions. Return UnknownLoc if there is none. DebugLoc -MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) { +MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { DebugLoc DL; - MachineBasicBlock::iterator E = end(); - if (MBBI != E) { - // Skip debug declarations, we don't want a DebugLoc from them. - MachineBasicBlock::iterator MBBI2 = MBBI; - while (MBBI2 != E && MBBI2->isDebugValue()) - MBBI2++; - if (MBBI2 != E) - DL = MBBI2->getDebugLoc(); - } + instr_iterator E = instr_end(); + if (MBBI == E) + return DL; + + // Skip debug declarations, we don't want a DebugLoc from them. + while (MBBI != E && MBBI->isDebugValue()) + MBBI++; + if (MBBI != E) + DL = MBBI->getDebugLoc(); return DL; } /// getSuccWeight - Return weight of the edge from this block to MBB. /// -uint32_t MachineBasicBlock::getSuccWeight(MachineBasicBlock *succ) { +uint32_t MachineBasicBlock::getSuccWeight(const MachineBasicBlock *succ) const { if (Weights.empty()) return 0; - succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); + const_succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); return *getWeightIterator(I); } @@ -789,6 +902,16 @@ getWeightIterator(MachineBasicBlock::succ_iterator I) { return Weights.begin() + index; } +/// getWeightIterator - Return wight iterator corresonding to the I successor +/// iterator +MachineBasicBlock::const_weight_iterator MachineBasicBlock:: +getWeightIterator(MachineBasicBlock::const_succ_iterator I) const { + assert(Weights.size() == Successors.size() && "Async weight list!"); + const size_t index = std::distance(Successors.begin(), I); + assert(index < Weights.size() && "Not a current successor!"); + return Weights.begin() + index; +} + void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB, bool t) { OS << "BB#" << MBB->getNumber(); diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index b92cda961474..a079d6e59139 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -56,6 +56,6 @@ bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) { /// the other block frequencies. We do this to avoid using of floating points. /// BlockFrequency MachineBlockFrequencyInfo:: -getBlockFreq(MachineBasicBlock *MBB) const { +getBlockFreq(const MachineBasicBlock *MBB) const { return MBFI->getBlockFreq(MBB); } diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp new file mode 100644 index 000000000000..22d7212007fc --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -0,0 +1,1001 @@ +//===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements basic block placement transformations using the CFG +// structure and branch probability estimates. +// +// The pass strives to preserve the structure of the CFG (that is, retain +// a topological ordering of basic blocks) in the absense of a *strong* signal +// to the contrary from probabilities. However, within the CFG structure, it +// attempts to choose an ordering which favors placing more likely sequences of +// blocks adjacent to each other. +// +// The algorithm works from the inner-most loop within a function outward, and +// at each stage walks through the basic blocks, trying to coalesce them into +// sequential chains where allowed by the CFG (or demanded by heavy +// probabilities). Finally, it walks the blocks in topological order, and the +// first time it reaches a chain of basic blocks, it schedules them in the +// function in-order. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "block-placement2" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include +using namespace llvm; + +STATISTIC(NumCondBranches, "Number of conditional branches"); +STATISTIC(NumUncondBranches, "Number of uncondittional branches"); +STATISTIC(CondBranchTakenFreq, + "Potential frequency of taking conditional branches"); +STATISTIC(UncondBranchTakenFreq, + "Potential frequency of taking unconditional branches"); + +namespace { +class BlockChain; +/// \brief Type for our function-wide basic block -> block chain mapping. +typedef DenseMap BlockToChainMapType; +} + +namespace { +/// \brief A chain of blocks which will be laid out contiguously. +/// +/// This is the datastructure representing a chain of consecutive blocks that +/// are profitable to layout together in order to maximize fallthrough +/// probabilities. We also can use a block chain to represent a sequence of +/// basic blocks which have some external (correctness) requirement for +/// sequential layout. +/// +/// Eventually, the block chains will form a directed graph over the function. +/// We provide an SCC-supporting-iterator in order to quicky build and walk the +/// SCCs of block chains within a function. +/// +/// The block chains also have support for calculating and caching probability +/// information related to the chain itself versus other chains. This is used +/// for ranking during the final layout of block chains. +class BlockChain { + /// \brief The sequence of blocks belonging to this chain. + /// + /// This is the sequence of blocks for a particular chain. These will be laid + /// out in-order within the function. + SmallVector Blocks; + + /// \brief A handle to the function-wide basic block to block chain mapping. + /// + /// This is retained in each block chain to simplify the computation of child + /// block chains for SCC-formation and iteration. We store the edges to child + /// basic blocks, and map them back to their associated chains using this + /// structure. + BlockToChainMapType &BlockToChain; + +public: + /// \brief Construct a new BlockChain. + /// + /// This builds a new block chain representing a single basic block in the + /// function. It also registers itself as the chain that block participates + /// in with the BlockToChain mapping. + BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB) + : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) { + assert(BB && "Cannot create a chain with a null basic block"); + BlockToChain[BB] = this; + } + + /// \brief Iterator over blocks within the chain. + typedef SmallVectorImpl::const_iterator iterator; + + /// \brief Beginning of blocks within the chain. + iterator begin() const { return Blocks.begin(); } + + /// \brief End of blocks within the chain. + iterator end() const { return Blocks.end(); } + + /// \brief Merge a block chain into this one. + /// + /// This routine merges a block chain into this one. It takes care of forming + /// a contiguous sequence of basic blocks, updating the edge list, and + /// updating the block -> chain mapping. It does not free or tear down the + /// old chain, but the old chain's block list is no longer valid. + void merge(MachineBasicBlock *BB, BlockChain *Chain) { + assert(BB); + assert(!Blocks.empty()); + + // Fast path in case we don't have a chain already. + if (!Chain) { + assert(!BlockToChain[BB]); + Blocks.push_back(BB); + BlockToChain[BB] = this; + return; + } + + assert(BB == *Chain->begin()); + assert(Chain->begin() != Chain->end()); + + // Update the incoming blocks to point to this chain, and add them to the + // chain structure. + for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end(); + BI != BE; ++BI) { + Blocks.push_back(*BI); + assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain"); + BlockToChain[*BI] = this; + } + } + +#ifndef NDEBUG + /// \brief Dump the blocks in this chain. + void dump() LLVM_ATTRIBUTE_USED { + for (iterator I = begin(), E = end(); I != E; ++I) + (*I)->dump(); + } +#endif // NDEBUG + + /// \brief Count of predecessors within the loop currently being processed. + /// + /// This count is updated at each loop we process to represent the number of + /// in-loop predecessors of this chain. + unsigned LoopPredecessors; +}; +} + +namespace { +class MachineBlockPlacement : public MachineFunctionPass { + /// \brief A typedef for a block filter set. + typedef SmallPtrSet BlockFilterSet; + + /// \brief A handle to the branch probability pass. + const MachineBranchProbabilityInfo *MBPI; + + /// \brief A handle to the function-wide block frequency pass. + const MachineBlockFrequencyInfo *MBFI; + + /// \brief A handle to the loop info. + const MachineLoopInfo *MLI; + + /// \brief A handle to the target's instruction info. + const TargetInstrInfo *TII; + + /// \brief A handle to the target's lowering info. + const TargetLowering *TLI; + + /// \brief Allocator and owner of BlockChain structures. + /// + /// We build BlockChains lazily by merging together high probability BB + /// sequences acording to the "Algo2" in the paper mentioned at the top of + /// the file. To reduce malloc traffic, we allocate them using this slab-like + /// allocator, and destroy them after the pass completes. + SpecificBumpPtrAllocator ChainAllocator; + + /// \brief Function wide BasicBlock to BlockChain mapping. + /// + /// This mapping allows efficiently moving from any given basic block to the + /// BlockChain it participates in, if any. We use it to, among other things, + /// allow implicitly defining edges between chains as the existing edges + /// between basic blocks. + DenseMap BlockToChain; + + void markChainSuccessors(BlockChain &Chain, + MachineBasicBlock *LoopHeaderBB, + SmallVectorImpl &BlockWorkList, + const BlockFilterSet *BlockFilter = 0); + MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, + BlockChain &Chain, + const BlockFilterSet *BlockFilter); + MachineBasicBlock *selectBestCandidateBlock( + BlockChain &Chain, SmallVectorImpl &WorkList, + const BlockFilterSet *BlockFilter); + MachineBasicBlock *getFirstUnplacedBlock( + MachineFunction &F, + const BlockChain &PlacedChain, + MachineFunction::iterator &PrevUnplacedBlockIt, + const BlockFilterSet *BlockFilter); + void buildChain(MachineBasicBlock *BB, BlockChain &Chain, + SmallVectorImpl &BlockWorkList, + const BlockFilterSet *BlockFilter = 0); + MachineBasicBlock *findBestLoopTop(MachineFunction &F, + MachineLoop &L, + const BlockFilterSet &LoopBlockSet); + void buildLoopChains(MachineFunction &F, MachineLoop &L); + void buildCFGChains(MachineFunction &F); + void AlignLoops(MachineFunction &F); + +public: + static char ID; // Pass identification, replacement for typeid + MachineBlockPlacement() : MachineFunctionPass(ID) { + initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} + +char MachineBlockPlacement::ID = 0; +char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID; +INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2", + "Branch Probability Basic Block Placement", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2", + "Branch Probability Basic Block Placement", false, false) + +#ifndef NDEBUG +/// \brief Helper to print the name of a MBB. +/// +/// Only used by debug logging. +static std::string getBlockName(MachineBasicBlock *BB) { + std::string Result; + raw_string_ostream OS(Result); + OS << "BB#" << BB->getNumber() + << " (derived from LLVM BB '" << BB->getName() << "')"; + OS.flush(); + return Result; +} + +/// \brief Helper to print the number of a MBB. +/// +/// Only used by debug logging. +static std::string getBlockNum(MachineBasicBlock *BB) { + std::string Result; + raw_string_ostream OS(Result); + OS << "BB#" << BB->getNumber(); + OS.flush(); + return Result; +} +#endif + +/// \brief Mark a chain's successors as having one fewer preds. +/// +/// When a chain is being merged into the "placed" chain, this routine will +/// quickly walk the successors of each block in the chain and mark them as +/// having one fewer active predecessor. It also adds any successors of this +/// chain which reach the zero-predecessor state to the worklist passed in. +void MachineBlockPlacement::markChainSuccessors( + BlockChain &Chain, + MachineBasicBlock *LoopHeaderBB, + SmallVectorImpl &BlockWorkList, + const BlockFilterSet *BlockFilter) { + // Walk all the blocks in this chain, marking their successors as having + // a predecessor placed. + for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end(); + CBI != CBE; ++CBI) { + // Add any successors for which this is the only un-placed in-loop + // predecessor to the worklist as a viable candidate for CFG-neutral + // placement. No subsequent placement of this block will violate the CFG + // shape, so we get to use heuristics to choose a favorable placement. + for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(), + SE = (*CBI)->succ_end(); + SI != SE; ++SI) { + if (BlockFilter && !BlockFilter->count(*SI)) + continue; + BlockChain &SuccChain = *BlockToChain[*SI]; + // Disregard edges within a fixed chain, or edges to the loop header. + if (&Chain == &SuccChain || *SI == LoopHeaderBB) + continue; + + // This is a cross-chain edge that is within the loop, so decrement the + // loop predecessor count of the destination chain. + if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0) + BlockWorkList.push_back(*SuccChain.begin()); + } + } +} + +/// \brief Select the best successor for a block. +/// +/// This looks across all successors of a particular block and attempts to +/// select the "best" one to be the layout successor. It only considers direct +/// successors which also pass the block filter. It will attempt to avoid +/// breaking CFG structure, but cave and break such structures in the case of +/// very hot successor edges. +/// +/// \returns The best successor block found, or null if none are viable. +MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( + MachineBasicBlock *BB, BlockChain &Chain, + const BlockFilterSet *BlockFilter) { + const BranchProbability HotProb(4, 5); // 80% + + MachineBasicBlock *BestSucc = 0; + // FIXME: Due to the performance of the probability and weight routines in + // the MBPI analysis, we manually compute probabilities using the edge + // weights. This is suboptimal as it means that the somewhat subtle + // definition of edge weight semantics is encoded here as well. We should + // improve the MBPI interface to effeciently support query patterns such as + // this. + uint32_t BestWeight = 0; + uint32_t WeightScale = 0; + uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale); + DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); + SI != SE; ++SI) { + if (BlockFilter && !BlockFilter->count(*SI)) + continue; + BlockChain &SuccChain = *BlockToChain[*SI]; + if (&SuccChain == &Chain) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n"); + continue; + } + if (*SI != *SuccChain.begin()) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Mid chain!\n"); + continue; + } + + uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI); + BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); + + // Only consider successors which are either "hot", or wouldn't violate + // any CFG constraints. + if (SuccChain.LoopPredecessors != 0) { + if (SuccProb < HotProb) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n"); + continue; + } + + // Make sure that a hot successor doesn't have a globally more important + // predecessor. + BlockFrequency CandidateEdgeFreq + = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl(); + bool BadCFGConflict = false; + for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(), + PE = (*SI)->pred_end(); + PI != PE; ++PI) { + if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) || + BlockToChain[*PI] == &Chain) + continue; + BlockFrequency PredEdgeFreq + = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI); + if (PredEdgeFreq >= CandidateEdgeFreq) { + BadCFGConflict = true; + break; + } + } + if (BadCFGConflict) { + DEBUG(dbgs() << " " << getBlockName(*SI) + << " -> non-cold CFG conflict\n"); + continue; + } + } + + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob)" + << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "") + << "\n"); + if (BestSucc && BestWeight >= SuccWeight) + continue; + BestSucc = *SI; + BestWeight = SuccWeight; + } + return BestSucc; +} + +namespace { +/// \brief Predicate struct to detect blocks already placed. +class IsBlockPlaced { + const BlockChain &PlacedChain; + const BlockToChainMapType &BlockToChain; + +public: + IsBlockPlaced(const BlockChain &PlacedChain, + const BlockToChainMapType &BlockToChain) + : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {} + + bool operator()(MachineBasicBlock *BB) const { + return BlockToChain.lookup(BB) == &PlacedChain; + } +}; +} + +/// \brief Select the best block from a worklist. +/// +/// This looks through the provided worklist as a list of candidate basic +/// blocks and select the most profitable one to place. The definition of +/// profitable only really makes sense in the context of a loop. This returns +/// the most frequently visited block in the worklist, which in the case of +/// a loop, is the one most desirable to be physically close to the rest of the +/// loop body in order to improve icache behavior. +/// +/// \returns The best block found, or null if none are viable. +MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( + BlockChain &Chain, SmallVectorImpl &WorkList, + const BlockFilterSet *BlockFilter) { + // Once we need to walk the worklist looking for a candidate, cleanup the + // worklist of already placed entries. + // FIXME: If this shows up on profiles, it could be folded (at the cost of + // some code complexity) into the loop below. + WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(), + IsBlockPlaced(Chain, BlockToChain)), + WorkList.end()); + + MachineBasicBlock *BestBlock = 0; + BlockFrequency BestFreq; + for (SmallVectorImpl::iterator WBI = WorkList.begin(), + WBE = WorkList.end(); + WBI != WBE; ++WBI) { + BlockChain &SuccChain = *BlockToChain[*WBI]; + if (&SuccChain == &Chain) { + DEBUG(dbgs() << " " << getBlockName(*WBI) + << " -> Already merged!\n"); + continue; + } + assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); + + BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI); + DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq + << " (freq)\n"); + if (BestBlock && BestFreq >= CandidateFreq) + continue; + BestBlock = *WBI; + BestFreq = CandidateFreq; + } + return BestBlock; +} + +/// \brief Retrieve the first unplaced basic block. +/// +/// This routine is called when we are unable to use the CFG to walk through +/// all of the basic blocks and form a chain due to unnatural loops in the CFG. +/// We walk through the function's blocks in order, starting from the +/// LastUnplacedBlockIt. We update this iterator on each call to avoid +/// re-scanning the entire sequence on repeated calls to this routine. +MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( + MachineFunction &F, const BlockChain &PlacedChain, + MachineFunction::iterator &PrevUnplacedBlockIt, + const BlockFilterSet *BlockFilter) { + for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E; + ++I) { + if (BlockFilter && !BlockFilter->count(I)) + continue; + if (BlockToChain[I] != &PlacedChain) { + PrevUnplacedBlockIt = I; + // Now select the head of the chain to which the unplaced block belongs + // as the block to place. This will force the entire chain to be placed, + // and satisfies the requirements of merging chains. + return *BlockToChain[I]->begin(); + } + } + return 0; +} + +void MachineBlockPlacement::buildChain( + MachineBasicBlock *BB, + BlockChain &Chain, + SmallVectorImpl &BlockWorkList, + const BlockFilterSet *BlockFilter) { + assert(BB); + assert(BlockToChain[BB] == &Chain); + MachineFunction &F = *BB->getParent(); + MachineFunction::iterator PrevUnplacedBlockIt = F.begin(); + + MachineBasicBlock *LoopHeaderBB = BB; + markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); + BB = *llvm::prior(Chain.end()); + for (;;) { + assert(BB); + assert(BlockToChain[BB] == &Chain); + assert(*llvm::prior(Chain.end()) == BB); + MachineBasicBlock *BestSucc = 0; + + // Look for the best viable successor if there is one to place immediately + // after this block. + BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); + + // If an immediate successor isn't available, look for the best viable + // block among those we've identified as not violating the loop's CFG at + // this point. This won't be a fallthrough, but it will increase locality. + if (!BestSucc) + BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter); + + if (!BestSucc) { + BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, + BlockFilter); + if (!BestSucc) + break; + + DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the " + "layout successor until the CFG reduces\n"); + } + + // Place this block, updating the datastructures to reflect its placement. + BlockChain &SuccChain = *BlockToChain[BestSucc]; + // Zero out LoopPredecessors for the successor we're about to merge in case + // we selected a successor that didn't fit naturally into the CFG. + SuccChain.LoopPredecessors = 0; + DEBUG(dbgs() << "Merging from " << getBlockNum(BB) + << " to " << getBlockNum(BestSucc) << "\n"); + markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); + Chain.merge(BestSucc, &SuccChain); + BB = *llvm::prior(Chain.end()); + } + + DEBUG(dbgs() << "Finished forming chain for header block " + << getBlockNum(*Chain.begin()) << "\n"); +} + +/// \brief Find the best loop top block for layout. +/// +/// This routine implements the logic to analyze the loop looking for the best +/// block to layout at the top of the loop. Typically this is done to maximize +/// fallthrough opportunities. +MachineBasicBlock * +MachineBlockPlacement::findBestLoopTop(MachineFunction &F, + MachineLoop &L, + const BlockFilterSet &LoopBlockSet) { + // We don't want to layout the loop linearly in all cases. If the loop header + // is just a normal basic block in the loop, we want to look for what block + // within the loop is the best one to layout at the top. However, if the loop + // header has be pre-merged into a chain due to predecessors not having + // analyzable branches, *and* the predecessor it is merged with is *not* part + // of the loop, rotating the header into the middle of the loop will create + // a non-contiguous range of blocks which is Very Bad. So start with the + // header and only rotate if safe. + BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; + if (!LoopBlockSet.count(*HeaderChain.begin())) + return L.getHeader(); + + BlockFrequency BestExitEdgeFreq; + MachineBasicBlock *ExitingBB = 0; + MachineBasicBlock *LoopingBB = 0; + // If there are exits to outer loops, loop rotation can severely limit + // fallthrough opportunites unless it selects such an exit. Keep a set of + // blocks where rotating to exit with that block will reach an outer loop. + SmallPtrSet BlocksExitingToOuterLoop; + + DEBUG(dbgs() << "Finding best loop exit for: " + << getBlockName(L.getHeader()) << "\n"); + for (MachineLoop::block_iterator I = L.block_begin(), + E = L.block_end(); + I != E; ++I) { + BlockChain &Chain = *BlockToChain[*I]; + // Ensure that this block is at the end of a chain; otherwise it could be + // mid-way through an inner loop or a successor of an analyzable branch. + if (*I != *llvm::prior(Chain.end())) + continue; + + // Now walk the successors. We need to establish whether this has a viable + // exiting successor and whether it has a viable non-exiting successor. + // We store the old exiting state and restore it if a viable looping + // successor isn't found. + MachineBasicBlock *OldExitingBB = ExitingBB; + BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq; + // We also compute and store the best looping successor for use in layout. + MachineBasicBlock *BestLoopSucc = 0; + // FIXME: Due to the performance of the probability and weight routines in + // the MBPI analysis, we use the internal weights. This is only valid + // because it is purely a ranking function, we don't care about anything + // but the relative values. + uint32_t BestLoopSuccWeight = 0; + // FIXME: We also manually compute the probabilities to avoid quadratic + // behavior. + uint32_t WeightScale = 0; + uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale); + for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(), + SE = (*I)->succ_end(); + SI != SE; ++SI) { + if ((*SI)->isLandingPad()) + continue; + if (*SI == *I) + continue; + BlockChain &SuccChain = *BlockToChain[*SI]; + // Don't split chains, either this chain or the successor's chain. + if (&Chain == &SuccChain || *SI != *SuccChain.begin()) { + DEBUG(dbgs() << " " << (LoopBlockSet.count(*SI) ? "looping: " + : "exiting: ") + << getBlockName(*I) << " -> " + << getBlockName(*SI) << " (chain conflict)\n"); + continue; + } + + uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI); + if (LoopBlockSet.count(*SI)) { + DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> " + << getBlockName(*SI) << " (" << SuccWeight << ")\n"); + if (BestLoopSucc && BestLoopSuccWeight >= SuccWeight) + continue; + + BestLoopSucc = *SI; + BestLoopSuccWeight = SuccWeight; + continue; + } + + BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); + BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb; + DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " + << getBlockName(*SI) << " (" << ExitEdgeFreq << ")\n"); + // Note that we slightly bias this toward an existing layout successor to + // retain incoming order in the absence of better information. + // FIXME: Should we bias this more strongly? It's pretty weak. + if (!ExitingBB || ExitEdgeFreq > BestExitEdgeFreq || + ((*I)->isLayoutSuccessor(*SI) && + !(ExitEdgeFreq < BestExitEdgeFreq))) { + BestExitEdgeFreq = ExitEdgeFreq; + ExitingBB = *I; + } + + if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) + if (ExitLoop->contains(&L)) + BlocksExitingToOuterLoop.insert(*I); + } + + // Restore the old exiting state, no viable looping successor was found. + if (!BestLoopSucc) { + ExitingBB = OldExitingBB; + BestExitEdgeFreq = OldBestExitEdgeFreq; + continue; + } + + // If this was best exiting block thus far, also record the looping block. + if (ExitingBB == *I) + LoopingBB = BestLoopSucc; + } + // Without a candidate exitting block or with only a single block in the + // loop, just use the loop header to layout the loop. + if (!ExitingBB || L.getNumBlocks() == 1) + return L.getHeader(); + + // Also, if we have exit blocks which lead to outer loops but didn't select + // one of them as the exiting block we are rotating toward, disable loop + // rotation altogether. + if (!BlocksExitingToOuterLoop.empty() && + !BlocksExitingToOuterLoop.count(ExitingBB)) + return L.getHeader(); + + assert(LoopingBB && "All successors of a loop block are exit blocks!"); + DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n"); + DEBUG(dbgs() << " Best top block: " << getBlockName(LoopingBB) << "\n"); + return LoopingBB; +} + +/// \brief Forms basic block chains from the natural loop structures. +/// +/// These chains are designed to preserve the existing *structure* of the code +/// as much as possible. We can then stitch the chains together in a way which +/// both preserves the topological structure and minimizes taken conditional +/// branches. +void MachineBlockPlacement::buildLoopChains(MachineFunction &F, + MachineLoop &L) { + // First recurse through any nested loops, building chains for those inner + // loops. + for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI) + buildLoopChains(F, **LI); + + SmallVector BlockWorkList; + BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end()); + + MachineBasicBlock *LayoutTop = findBestLoopTop(F, L, LoopBlockSet); + BlockChain &LoopChain = *BlockToChain[LayoutTop]; + + // FIXME: This is a really lame way of walking the chains in the loop: we + // walk the blocks, and use a set to prevent visiting a particular chain + // twice. + SmallPtrSet UpdatedPreds; + assert(LoopChain.LoopPredecessors == 0); + UpdatedPreds.insert(&LoopChain); + for (MachineLoop::block_iterator BI = L.block_begin(), + BE = L.block_end(); + BI != BE; ++BI) { + BlockChain &Chain = *BlockToChain[*BI]; + if (!UpdatedPreds.insert(&Chain)) + continue; + + assert(Chain.LoopPredecessors == 0); + for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end(); + BCI != BCE; ++BCI) { + assert(BlockToChain[*BCI] == &Chain); + for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(), + PE = (*BCI)->pred_end(); + PI != PE; ++PI) { + if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI)) + continue; + ++Chain.LoopPredecessors; + } + } + + if (Chain.LoopPredecessors == 0) + BlockWorkList.push_back(*Chain.begin()); + } + + buildChain(LayoutTop, LoopChain, BlockWorkList, &LoopBlockSet); + + DEBUG({ + // Crash at the end so we get all of the debugging output first. + bool BadLoop = false; + if (LoopChain.LoopPredecessors) { + BadLoop = true; + dbgs() << "Loop chain contains a block without its preds placed!\n" + << " Loop header: " << getBlockName(*L.block_begin()) << "\n" + << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"; + } + for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end(); + BCI != BCE; ++BCI) + if (!LoopBlockSet.erase(*BCI)) { + // We don't mark the loop as bad here because there are real situations + // where this can occur. For example, with an unanalyzable fallthrough + // from a loop block to a non-loop block or vice versa. + dbgs() << "Loop chain contains a block not contained by the loop!\n" + << " Loop header: " << getBlockName(*L.block_begin()) << "\n" + << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" + << " Bad block: " << getBlockName(*BCI) << "\n"; + } + + if (!LoopBlockSet.empty()) { + BadLoop = true; + for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(), + LBE = LoopBlockSet.end(); + LBI != LBE; ++LBI) + dbgs() << "Loop contains blocks never placed into a chain!\n" + << " Loop header: " << getBlockName(*L.block_begin()) << "\n" + << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" + << " Bad block: " << getBlockName(*LBI) << "\n"; + } + assert(!BadLoop && "Detected problems with the placement of this loop."); + }); +} + +void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { + // Ensure that every BB in the function has an associated chain to simplify + // the assumptions of the remaining algorithm. + SmallVector Cond; // For AnalyzeBranch. + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + MachineBasicBlock *BB = FI; + BlockChain *Chain + = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); + // Also, merge any blocks which we cannot reason about and must preserve + // the exact fallthrough behavior for. + for (;;) { + Cond.clear(); + MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) + break; + + MachineFunction::iterator NextFI(llvm::next(FI)); + MachineBasicBlock *NextBB = NextFI; + // Ensure that the layout successor is a viable block, as we know that + // fallthrough is a possibility. + assert(NextFI != FE && "Can't fallthrough past the last block."); + DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: " + << getBlockName(BB) << " -> " << getBlockName(NextBB) + << "\n"); + Chain->merge(NextBB, 0); + FI = NextFI; + BB = NextBB; + } + } + + // Build any loop-based chains. + for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE; + ++LI) + buildLoopChains(F, **LI); + + SmallVector BlockWorkList; + + SmallPtrSet UpdatedPreds; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + MachineBasicBlock *BB = &*FI; + BlockChain &Chain = *BlockToChain[BB]; + if (!UpdatedPreds.insert(&Chain)) + continue; + + assert(Chain.LoopPredecessors == 0); + for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end(); + BCI != BCE; ++BCI) { + assert(BlockToChain[*BCI] == &Chain); + for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(), + PE = (*BCI)->pred_end(); + PI != PE; ++PI) { + if (BlockToChain[*PI] == &Chain) + continue; + ++Chain.LoopPredecessors; + } + } + + if (Chain.LoopPredecessors == 0) + BlockWorkList.push_back(*Chain.begin()); + } + + BlockChain &FunctionChain = *BlockToChain[&F.front()]; + buildChain(&F.front(), FunctionChain, BlockWorkList); + + typedef SmallPtrSet FunctionBlockSetType; + DEBUG({ + // Crash at the end so we get all of the debugging output first. + bool BadFunc = false; + FunctionBlockSetType FunctionBlockSet; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + FunctionBlockSet.insert(FI); + + for (BlockChain::iterator BCI = FunctionChain.begin(), + BCE = FunctionChain.end(); + BCI != BCE; ++BCI) + if (!FunctionBlockSet.erase(*BCI)) { + BadFunc = true; + dbgs() << "Function chain contains a block not in the function!\n" + << " Bad block: " << getBlockName(*BCI) << "\n"; + } + + if (!FunctionBlockSet.empty()) { + BadFunc = true; + for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(), + FBE = FunctionBlockSet.end(); + FBI != FBE; ++FBI) + dbgs() << "Function contains blocks never placed into a chain!\n" + << " Bad block: " << getBlockName(*FBI) << "\n"; + } + assert(!BadFunc && "Detected problems with the block placement."); + }); + + // Splice the blocks into place. + MachineFunction::iterator InsertPos = F.begin(); + for (BlockChain::iterator BI = FunctionChain.begin(), + BE = FunctionChain.end(); + BI != BE; ++BI) { + DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain " + : " ... ") + << getBlockName(*BI) << "\n"); + if (InsertPos != MachineFunction::iterator(*BI)) + F.splice(InsertPos, *BI); + else + ++InsertPos; + + // Update the terminator of the previous block. + if (BI == FunctionChain.begin()) + continue; + MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI)); + + // FIXME: It would be awesome of updateTerminator would just return rather + // than assert when the branch cannot be analyzed in order to remove this + // boiler plate. + Cond.clear(); + MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) + PrevBB->updateTerminator(); + } + + // Fixup the last block. + Cond.clear(); + MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond)) + F.back().updateTerminator(); +} + +/// \brief Recursive helper to align a loop and any nested loops. +static void AlignLoop(MachineFunction &F, MachineLoop *L, unsigned Align) { + // Recurse through nested loops. + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) + AlignLoop(F, *I, Align); + + L->getTopBlock()->setAlignment(Align); +} + +/// \brief Align loop headers to target preferred alignments. +void MachineBlockPlacement::AlignLoops(MachineFunction &F) { + if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + return; + + unsigned Align = TLI->getPrefLoopAlignment(); + if (!Align) + return; // Don't care about loop alignment. + + for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I) + AlignLoop(F, *I, Align); +} + +bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { + // Check for single-block functions and skip them. + if (llvm::next(F.begin()) == F.end()) + return false; + + MBPI = &getAnalysis(); + MBFI = &getAnalysis(); + MLI = &getAnalysis(); + TII = F.getTarget().getInstrInfo(); + TLI = F.getTarget().getTargetLowering(); + assert(BlockToChain.empty()); + + buildCFGChains(F); + AlignLoops(F); + + BlockToChain.clear(); + ChainAllocator.DestroyAll(); + + // We always return true as we have no way to track whether the final order + // differs from the original order. + return true; +} + +namespace { +/// \brief A pass to compute block placement statistics. +/// +/// A separate pass to compute interesting statistics for evaluating block +/// placement. This is separate from the actual placement pass so that they can +/// be computed in the absense of any placement transformations or when using +/// alternative placement strategies. +class MachineBlockPlacementStats : public MachineFunctionPass { + /// \brief A handle to the branch probability pass. + const MachineBranchProbabilityInfo *MBPI; + + /// \brief A handle to the function-wide block frequency pass. + const MachineBlockFrequencyInfo *MBFI; + +public: + static char ID; // Pass identification, replacement for typeid + MachineBlockPlacementStats() : MachineFunctionPass(ID) { + initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} + +char MachineBlockPlacementStats::ID = 0; +char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID; +INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats", + "Basic Block Placement Stats", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats", + "Basic Block Placement Stats", false, false) + +bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { + // Check for single-block functions and skip them. + if (llvm::next(F.begin()) == F.end()) + return false; + + MBPI = &getAnalysis(); + MBFI = &getAnalysis(); + + for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { + BlockFrequency BlockFreq = MBFI->getBlockFreq(I); + Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches + : NumUncondBranches; + Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq + : UncondBranchTakenFreq; + for (MachineBasicBlock::succ_iterator SI = I->succ_begin(), + SE = I->succ_end(); + SI != SE; ++SI) { + // Skip if this successor is a fallthrough. + if (I->isLayoutSuccessor(*SI)) + continue; + + BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI); + ++NumBranches; + BranchTakenFreq += EdgeFreq.getFrequency(); + } + } + + return false; +} + diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp index c13fa6bc5333..0cc1af07952d 100644 --- a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -26,26 +26,43 @@ INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob", char MachineBranchProbabilityInfo::ID = 0; -uint32_t MachineBranchProbabilityInfo:: -getSumForBlock(MachineBasicBlock *MBB) const { - uint32_t Sum = 0; +void MachineBranchProbabilityInfo::anchor() { } +uint32_t MachineBranchProbabilityInfo:: +getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const { + // First we compute the sum with 64-bits of precision, ensuring that cannot + // overflow by bounding the number of weights considered. Hopefully no one + // actually needs 2^32 successors. + assert(MBB->succ_size() < UINT32_MAX); + uint64_t Sum = 0; + Scale = 1; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - MachineBasicBlock *Succ = *I; - uint32_t Weight = getEdgeWeight(MBB, Succ); - uint32_t PrevSum = Sum; - + uint32_t Weight = getEdgeWeight(MBB, *I); Sum += Weight; - assert(Sum > PrevSum); (void) PrevSum; } + // If the computed sum fits in 32-bits, we're done. + if (Sum <= UINT32_MAX) + return Sum; + + // Otherwise, compute the scale necessary to cause the weights to fit, and + // re-sum with that scale applied. + assert((Sum / UINT32_MAX) < UINT32_MAX); + Scale = (Sum / UINT32_MAX) + 1; + Sum = 0; + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + uint32_t Weight = getEdgeWeight(MBB, *I); + Sum += Weight / Scale; + } + assert(Sum <= UINT32_MAX); return Sum; } uint32_t -MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src, - MachineBasicBlock *Dst) const { +MachineBranchProbabilityInfo::getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { uint32_t Weight = Src->getSuccWeight(Dst); if (!Weight) return DEFAULT_WEIGHT; @@ -55,37 +72,24 @@ MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src, bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src, MachineBasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% - uint32_t Weight = getEdgeWeight(Src, Dst); - uint32_t Sum = getSumForBlock(Src); - - // FIXME: Implement BranchProbability::compare then change this code to - // compare this BranchProbability against a static "hot" BranchProbability. - return (uint64_t)Weight * 5 > (uint64_t)Sum * 4; + // FIXME: Compare against a static "hot" BranchProbability. + return getEdgeProbability(Src, Dst) > BranchProbability(4, 5); } MachineBasicBlock * MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { - uint32_t Sum = 0; uint32_t MaxWeight = 0; MachineBasicBlock *MaxSucc = 0; - for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - MachineBasicBlock *Succ = *I; - uint32_t Weight = getEdgeWeight(MBB, Succ); - uint32_t PrevSum = Sum; - - Sum += Weight; - assert(Sum > PrevSum); (void) PrevSum; - + uint32_t Weight = getEdgeWeight(MBB, *I); if (Weight > MaxWeight) { MaxWeight = Weight; - MaxSucc = Succ; + MaxSucc = *I; } } - // FIXME: Use BranchProbability::compare. - if ((uint64_t)MaxWeight * 5 >= (uint64_t)Sum * 4) + if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5)) return MaxSucc; return 0; @@ -94,8 +98,9 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src, MachineBasicBlock *Dst) const { - uint32_t N = getEdgeWeight(Src, Dst); - uint32_t D = getSumForBlock(Src); + uint32_t Scale = 1; + uint32_t D = getSumForBlock(Src, Scale); + uint32_t N = getEdgeWeight(Src, Dst) / Scale; return BranchProbability(N, D); } diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp index 7eda8c129dc4..a63688e9ec62 100644 --- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -26,13 +26,14 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" - using namespace llvm; STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumCSEs, "Number of common subexpression eliminated"); STATISTIC(NumPhysCSEs, "Number of physreg referencing common subexpr eliminated"); +STATISTIC(NumCrossBBCSEs, + "Number of cross-MBB physreg referencing CS eliminated"); STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); namespace { @@ -49,7 +50,7 @@ namespace { } virtual bool runOnMachineFunction(MachineFunction &MF); - + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); @@ -62,6 +63,8 @@ namespace { virtual void releaseMemory() { ScopeMap.clear(); Exps.clear(); + AllocatableRegs.clear(); + ReservedRegs.clear(); } private: @@ -75,6 +78,8 @@ namespace { ScopedHTType VNT; SmallVector Exps; unsigned CurrVN; + BitVector AllocatableRegs; + BitVector ReservedRegs; bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); bool isPhysDefTriviallyDead(unsigned Reg, @@ -82,9 +87,12 @@ namespace { MachineBasicBlock::const_iterator E) const ; bool hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, - SmallSet &PhysRefs) const; + SmallSet &PhysRefs, + SmallVector &PhysDefs) const; bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, - SmallSet &PhysRefs) const; + SmallSet &PhysRefs, + SmallVector &PhysDefs, + bool &NonLocal) const; bool isCSECandidate(MachineInstr *MI); bool isProfitableToCSE(unsigned CSReg, unsigned Reg, MachineInstr *CSMI, MachineInstr *MI); @@ -99,6 +107,7 @@ namespace { } // end anonymous namespace char MachineCSE::ID = 0; +char &llvm::MachineCSEID = MachineCSE::ID; INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse", "Machine Common Subexpression Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) @@ -106,8 +115,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(MachineCSE, "machine-cse", "Machine Common Subexpression Elimination", false, false) -FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); } - bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB) { bool Changed = false; @@ -163,6 +170,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, bool SeenDef = false; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { const MachineOperand &MO = I->getOperand(i); + if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) + SeenDef = true; if (!MO.isReg() || !MO.getReg()) continue; if (!TRI->regsOverlap(MO.getReg(), Reg)) @@ -173,7 +182,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, SeenDef = true; } if (SeenDef) - // See a def of Reg (or an alias) before encountering any use, it's + // See a def of Reg (or an alias) before encountering any use, it's // trivially dead. return true; @@ -189,7 +198,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, /// instruction does not uses a physical register. bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, - SmallSet &PhysRefs) const { + SmallSet &PhysRefs, + SmallVector &PhysDefs) const{ MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); @@ -207,7 +217,9 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end()))) continue; PhysRefs.insert(Reg); - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) + if (MO.isDef()) + PhysDefs.push_back(Reg); + for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) PhysRefs.insert(*Alias); } @@ -215,25 +227,56 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, } bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, - SmallSet &PhysRefs) const { + SmallSet &PhysRefs, + SmallVector &PhysDefs, + bool &NonLocal) const { // For now conservatively returns false if the common subexpression is - // not in the same basic block as the given instruction. - MachineBasicBlock *MBB = MI->getParent(); - if (CSMI->getParent() != MBB) - return false; + // not in the same basic block as the given instruction. The only exception + // is if the common subexpression is in the sole predecessor block. + const MachineBasicBlock *MBB = MI->getParent(); + const MachineBasicBlock *CSMBB = CSMI->getParent(); + + bool CrossMBB = false; + if (CSMBB != MBB) { + if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB) + return false; + + for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { + if (AllocatableRegs.test(PhysDefs[i]) || ReservedRegs.test(PhysDefs[i])) + // Avoid extending live range of physical registers if they are + //allocatable or reserved. + return false; + } + CrossMBB = true; + } MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I); MachineBasicBlock::const_iterator E = MI; + MachineBasicBlock::const_iterator EE = CSMBB->end(); unsigned LookAheadLeft = LookAheadLimit; while (LookAheadLeft) { // Skip over dbg_value's. - while (I != E && I->isDebugValue()) + while (I != E && I != EE && I->isDebugValue()) ++I; + if (I == EE) { + assert(CrossMBB && "Reaching end-of-MBB without finding MI?"); + (void)CrossMBB; + CrossMBB = false; + NonLocal = true; + I = MBB->begin(); + EE = MBB->end(); + continue; + } + if (I == E) return true; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { const MachineOperand &MO = I->getOperand(i); + // RegMasks go on instructions like calls that clobber lots of physregs. + // Don't attempt to CSE across such an instruction. + if (MO.isRegMask()) + return false; if (!MO.isReg() || !MO.isDef()) continue; unsigned MOReg = MO.getReg(); @@ -260,12 +303,11 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) { return false; // Ignore stuff that we obviously can't move. - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.mayStore() || MCID.isCall() || MCID.isTerminator() || + if (MI->mayStore() || MI->isCall() || MI->isTerminator() || MI->hasUnmodeledSideEffects()) return false; - if (MCID.mayLoad()) { + if (MI->mayLoad()) { // Okay, this instruction does a load. As a refinement, we allow the target // to decide whether the loaded value is actually a constant. If so, we can // actually use it as a load. @@ -287,7 +329,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in // an immediate predecessor. We don't want to increase register pressure and // end up causing other computation to be spilled. - if (MI->getDesc().isAsCheapAsAMove()) { + if (MI->isAsCheapAsAMove()) { MachineBasicBlock *CSBB = CSMI->getParent(); MachineBasicBlock *BB = MI->getParent(); if (CSBB != BB && !CSBB->isSuccessor(BB)) @@ -376,7 +418,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Commute commutable instructions. bool Commuted = false; - if (!FoundCSE && MI->getDesc().isCommutable()) { + if (!FoundCSE && MI->isCommutable()) { MachineInstr *NewMI = TII->commuteInstruction(MI); if (NewMI) { Commuted = true; @@ -394,16 +436,18 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // If the instruction defines physical registers and the values *may* be // used, then it's not safe to replace it with a common subexpression. // It's also not safe if the instruction uses physical registers. + bool CrossMBBPhysDef = false; SmallSet PhysRefs; - if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) { + SmallVector PhysDefs; + if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) { FoundCSE = false; - // ... Unless the CS is local and it also defines the physical register - // which is not clobbered in between and the physical register uses - // were not clobbered. + // ... Unless the CS is local or is in the sole predecessor block + // and it also defines the physical register which is not clobbered + // in between and the physical register uses were not clobbered. unsigned CSVN = VNT.lookup(MI); MachineInstr *CSMI = Exps[CSVN]; - if (PhysRegDefsReach(CSMI, MI, PhysRefs)) + if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) FoundCSE = true; } @@ -458,6 +502,18 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); MRI->clearKillFlags(CSEPairs[i].second); } + + if (CrossMBBPhysDef) { + // Add physical register defs now coming in from a predecessor to MBB + // livein list. + while (!PhysDefs.empty()) { + unsigned LiveIn = PhysDefs.pop_back_val(); + if (!MBB->isLiveIn(LiveIn)) + MBB->addLiveIn(LiveIn); + } + ++NumCrossBBCSEs; + } + MI->eraseFromParent(); ++NumCSEs; if (!PhysRefs.empty()) @@ -542,5 +598,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); AA = &getAnalysis(); DT = &getAnalysis(); + AllocatableRegs = TRI->getAllocatableSet(MF); + ReservedRegs = TRI->getReservedRegs(MF); return PerformCSE(DT->getRootNode()); } diff --git a/contrib/llvm/lib/CodeGen/MachineCodeEmitter.cpp b/contrib/llvm/lib/CodeGen/MachineCodeEmitter.cpp new file mode 100644 index 000000000000..81b49784c052 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineCodeEmitter.cpp @@ -0,0 +1,14 @@ +//===-- llvm/CodeGen/MachineCodeEmitter.cpp - Code emission -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineCodeEmitter.h" + +using namespace llvm; + +void MachineCodeEmitter::anchor() { } diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp new file mode 100644 index 000000000000..9730eaacf6e4 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -0,0 +1,340 @@ +//===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an extremely simple MachineInstr-level copy propagation pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "codegen-cp" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Pass.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumDeletes, "Number of dead copies deleted"); + +namespace { + class MachineCopyPropagation : public MachineFunctionPass { + const TargetRegisterInfo *TRI; + BitVector ReservedRegs; + + public: + static char ID; // Pass identification, replacement for typeid + MachineCopyPropagation() : MachineFunctionPass(ID) { + initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + typedef SmallVector DestList; + typedef DenseMap SourceMap; + + void SourceNoLongerAvailable(unsigned Reg, + SourceMap &SrcMap, + DenseMap &AvailCopyMap); + bool CopyPropagateBlock(MachineBasicBlock &MBB); + }; +} +char MachineCopyPropagation::ID = 0; +char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID; + +INITIALIZE_PASS(MachineCopyPropagation, "machine-cp", + "Machine Copy Propagation Pass", false, false) + +void +MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg, + SourceMap &SrcMap, + DenseMap &AvailCopyMap) { + SourceMap::iterator SI = SrcMap.find(Reg); + if (SI != SrcMap.end()) { + const DestList& Defs = SI->second; + for (DestList::const_iterator I = Defs.begin(), E = Defs.end(); + I != E; ++I) { + unsigned MappedDef = *I; + // Source of copy is no longer available for propagation. + if (AvailCopyMap.erase(MappedDef)) { + for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR) + AvailCopyMap.erase(*SR); + } + } + } + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + SI = SrcMap.find(*AS); + if (SI != SrcMap.end()) { + const DestList& Defs = SI->second; + for (DestList::const_iterator I = Defs.begin(), E = Defs.end(); + I != E; ++I) { + unsigned MappedDef = *I; + if (AvailCopyMap.erase(MappedDef)) { + for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR) + AvailCopyMap.erase(*SR); + } + } + } + } +} + +static bool NoInterveningSideEffect(const MachineInstr *CopyMI, + const MachineInstr *MI) { + const MachineBasicBlock *MBB = CopyMI->getParent(); + if (MI->getParent() != MBB) + return false; + MachineBasicBlock::const_iterator I = CopyMI; + MachineBasicBlock::const_iterator E = MBB->end(); + MachineBasicBlock::const_iterator E2 = MI; + + ++I; + while (I != E && I != E2) { + if (I->hasUnmodeledSideEffects() || I->isCall() || + I->isTerminator()) + return false; + ++I; + } + return true; +} + +/// isNopCopy - Return true if the specified copy is really a nop. That is +/// if the source of the copy is the same of the definition of the copy that +/// supplied the source. If the source of the copy is a sub-register than it +/// must check the sub-indices match. e.g. +/// ecx = mov eax +/// al = mov cl +/// But not +/// ecx = mov eax +/// al = mov ch +static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src, + const TargetRegisterInfo *TRI) { + unsigned SrcSrc = CopyMI->getOperand(1).getReg(); + if (Def == SrcSrc) + return true; + if (TRI->isSubRegister(SrcSrc, Def)) { + unsigned SrcDef = CopyMI->getOperand(0).getReg(); + unsigned SubIdx = TRI->getSubRegIndex(SrcSrc, Def); + if (!SubIdx) + return false; + return SubIdx == TRI->getSubRegIndex(SrcDef, Src); + } + + return false; +} + +bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { + SmallSetVector MaybeDeadCopies; // Candidates for deletion + DenseMap AvailCopyMap; // Def -> available copies map + DenseMap CopyMap; // Def -> copies map + SourceMap SrcMap; // Src -> Def map + + bool Changed = false; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { + MachineInstr *MI = &*I; + ++I; + + if (MI->isCopy()) { + unsigned Def = MI->getOperand(0).getReg(); + unsigned Src = MI->getOperand(1).getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Def) || + TargetRegisterInfo::isVirtualRegister(Src)) + report_fatal_error("MachineCopyPropagation should be run after" + " register allocation!"); + + DenseMap::iterator CI = AvailCopyMap.find(Src); + if (CI != AvailCopyMap.end()) { + MachineInstr *CopyMI = CI->second; + if (!ReservedRegs.test(Def) && + (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) && + isNopCopy(CopyMI, Def, Src, TRI)) { + // The two copies cancel out and the source of the first copy + // hasn't been overridden, eliminate the second one. e.g. + // %ECX = COPY %EAX + // ... nothing clobbered EAX. + // %EAX = COPY %ECX + // => + // %ECX = COPY %EAX + // + // Also avoid eliminating a copy from reserved registers unless the + // definition is proven not clobbered. e.g. + // %RSP = COPY %RAX + // CALL + // %RAX = COPY %RSP + + // Clear any kills of Def between CopyMI and MI. This extends the + // live range. + for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I) + I->clearRegisterKills(Def, TRI); + + MI->eraseFromParent(); + Changed = true; + ++NumDeletes; + continue; + } + } + + // If Src is defined by a previous copy, it cannot be eliminated. + CI = CopyMap.find(Src); + if (CI != CopyMap.end()) + MaybeDeadCopies.remove(CI->second); + for (const uint16_t *AS = TRI->getAliasSet(Src); *AS; ++AS) { + CI = CopyMap.find(*AS); + if (CI != CopyMap.end()) + MaybeDeadCopies.remove(CI->second); + } + + // Copy is now a candidate for deletion. + MaybeDeadCopies.insert(MI); + + // If 'Src' is previously source of another copy, then this earlier copy's + // source is no longer available. e.g. + // %xmm9 = copy %xmm2 + // ... + // %xmm2 = copy %xmm0 + // ... + // %xmm2 = copy %xmm9 + SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap); + + // Remember Def is defined by the copy. + // ... Make sure to clear the def maps of aliases first. + for (const uint16_t *AS = TRI->getAliasSet(Def); *AS; ++AS) { + CopyMap.erase(*AS); + AvailCopyMap.erase(*AS); + } + CopyMap[Def] = MI; + AvailCopyMap[Def] = MI; + for (const uint16_t *SR = TRI->getSubRegisters(Def); *SR; ++SR) { + CopyMap[*SR] = MI; + AvailCopyMap[*SR] = MI; + } + + // Remember source that's copied to Def. Once it's clobbered, then + // it's no longer available for copy propagation. + if (std::find(SrcMap[Src].begin(), SrcMap[Src].end(), Def) == + SrcMap[Src].end()) { + SrcMap[Src].push_back(Def); + } + + continue; + } + + // Not a copy. + SmallVector Defs; + int RegMaskOpNum = -1; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + RegMaskOpNum = i; + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (TargetRegisterInfo::isVirtualRegister(Reg)) + report_fatal_error("MachineCopyPropagation should be run after" + " register allocation!"); + + if (MO.isDef()) { + Defs.push_back(Reg); + continue; + } + + // If 'Reg' is defined by a copy, the copy is no longer a candidate + // for elimination. + DenseMap::iterator CI = CopyMap.find(Reg); + if (CI != CopyMap.end()) + MaybeDeadCopies.remove(CI->second); + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + CI = CopyMap.find(*AS); + if (CI != CopyMap.end()) + MaybeDeadCopies.remove(CI->second); + } + } + + // The instruction has a register mask operand which means that it clobbers + // a large set of registers. It is possible to use the register mask to + // prune the available copies, but treat it like a basic block boundary for + // now. + if (RegMaskOpNum >= 0) { + // Erase any MaybeDeadCopies whose destination register is clobbered. + const MachineOperand &MaskMO = MI->getOperand(RegMaskOpNum); + for (SmallSetVector::iterator + DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); + DI != DE; ++DI) { + unsigned Reg = (*DI)->getOperand(0).getReg(); + if (ReservedRegs.test(Reg) || !MaskMO.clobbersPhysReg(Reg)) + continue; + (*DI)->eraseFromParent(); + Changed = true; + ++NumDeletes; + } + + // Clear all data structures as if we were beginning a new basic block. + MaybeDeadCopies.clear(); + AvailCopyMap.clear(); + CopyMap.clear(); + SrcMap.clear(); + continue; + } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Reg = Defs[i]; + + // No longer defined by a copy. + CopyMap.erase(Reg); + AvailCopyMap.erase(Reg); + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + CopyMap.erase(*AS); + AvailCopyMap.erase(*AS); + } + + // If 'Reg' is previously source of a copy, it is no longer available for + // copy propagation. + SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap); + } + } + + // If MBB doesn't have successors, delete the copies whose defs are not used. + // If MBB does have successors, then conservative assume the defs are live-out + // since we don't want to trust live-in lists. + if (MBB.succ_empty()) { + for (SmallSetVector::iterator + DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); + DI != DE; ++DI) { + if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) { + (*DI)->eraseFromParent(); + Changed = true; + ++NumDeletes; + } + } + } + + return Changed; +} + +bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + + TRI = MF.getTarget().getRegisterInfo(); + ReservedRegs = TRI->getReservedRegs(MF); + + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= CopyPropagateBlock(*I); + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp index 20066a067b8f..d8c2f6a2eaef 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -13,12 +13,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Config/config.h" -#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" @@ -28,6 +25,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetData.h" @@ -197,9 +195,10 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { MachineMemOperand * MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, uint64_t s, unsigned base_alignment, - const MDNode *TBAAInfo) { + const MDNode *TBAAInfo, + const MDNode *Ranges) { return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment, - TBAAInfo); + TBAAInfo, Ranges); } MachineMemOperand * @@ -286,7 +285,13 @@ void MachineFunction::dump() const { } void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { - OS << "# Machine code for function " << Fn->getName() << ":\n"; + OS << "# Machine code for function " << Fn->getName() << ": "; + if (RegInfo) { + OS << (RegInfo->isSSA() ? "SSA" : "Post SSA"); + if (!RegInfo->tracksLiveness()) + OS << ", not tracking liveness"; + } + OS << '\n'; // Print Frame Information FrameInfo->print(*this, OS); @@ -335,7 +340,7 @@ namespace llvm { DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const MachineFunction *F) { - return "CFG for '" + F->getFunction()->getNameStr() + "' function"; + return "CFG for '" + F->getFunction()->getName().str() + "' function"; } std::string getNodeLabel(const MachineBasicBlock *Node, @@ -368,7 +373,7 @@ namespace llvm { void MachineFunction::viewCFG() const { #ifndef NDEBUG - ViewGraph(this, "mf" + getFunction()->getNameStr()); + ViewGraph(this, "mf" + getFunction()->getName()); #else errs() << "MachineFunction::viewCFG is only available in debug builds on " << "systems with Graphviz or gv!\n"; @@ -378,7 +383,7 @@ void MachineFunction::viewCFG() const void MachineFunction::viewCFGOnly() const { #ifndef NDEBUG - ViewGraph(this, "mf" + getFunction()->getNameStr(), true); + ViewGraph(this, "mf" + getFunction()->getName(), true); #else errs() << "MachineFunction::viewCFGOnly is only available in debug builds on " << "systems with Graphviz or gv!\n"; @@ -464,7 +469,7 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { if (!isCalleeSavedInfoValid()) return BV; - for (const unsigned *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR) + for (const uint16_t *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR) BV.set(*CSR); // The entry MBB always has all CSRs pristine. @@ -532,6 +537,8 @@ unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const { switch (getEntryKind()) { case MachineJumpTableInfo::EK_BlockAddress: return TD.getPointerSize(); + case MachineJumpTableInfo::EK_GPRel64BlockAddress: + return 8; case MachineJumpTableInfo::EK_GPRel32BlockAddress: case MachineJumpTableInfo::EK_LabelDifference32: case MachineJumpTableInfo::EK_Custom32: @@ -539,8 +546,7 @@ unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const { case MachineJumpTableInfo::EK_Inline: return 0; } - assert(0 && "Unknown jump table encoding!"); - return ~0; + llvm_unreachable("Unknown jump table encoding!"); } /// getEntryAlignment - Return the alignment of each entry in the jump table. @@ -551,6 +557,8 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const { switch (getEntryKind()) { case MachineJumpTableInfo::EK_BlockAddress: return TD.getPointerABIAlignment(); + case MachineJumpTableInfo::EK_GPRel64BlockAddress: + return TD.getABIIntegerTypeAlignment(64); case MachineJumpTableInfo::EK_GPRel32BlockAddress: case MachineJumpTableInfo::EK_LabelDifference32: case MachineJumpTableInfo::EK_Custom32: @@ -558,8 +566,7 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const { case MachineJumpTableInfo::EK_Inline: return 1; } - assert(0 && "Unknown jump table encoding!"); - return ~0; + llvm_unreachable("Unknown jump table encoding!"); } /// createJumpTableIndex - Create a new jump table entry in the jump table info. @@ -619,6 +626,8 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); } // MachineConstantPool implementation //===----------------------------------------------------------------------===// +void MachineConstantPoolValue::anchor() { } + Type *MachineConstantPoolEntry::getType() const { if (isMachineConstantPoolEntry()) return Val.MachineCPVal->getType(); @@ -653,35 +662,37 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, // reject them. if (A->getType() == B->getType()) return false; + // We can't handle structs or arrays. + if (isa(A->getType()) || isa(A->getType()) || + isa(B->getType()) || isa(B->getType())) + return false; + // For now, only support constants with the same size. - if (TD->getTypeStoreSize(A->getType()) != TD->getTypeStoreSize(B->getType())) + uint64_t StoreSize = TD->getTypeStoreSize(A->getType()); + if (StoreSize != TD->getTypeStoreSize(B->getType()) || + StoreSize > 128) return false; - // If a floating-point value and an integer value have the same encoding, - // they can share a constant-pool entry. - if (const ConstantFP *AFP = dyn_cast(A)) - if (const ConstantInt *BI = dyn_cast(B)) - return AFP->getValueAPF().bitcastToAPInt() == BI->getValue(); - if (const ConstantFP *BFP = dyn_cast(B)) - if (const ConstantInt *AI = dyn_cast(A)) - return BFP->getValueAPF().bitcastToAPInt() == AI->getValue(); + Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8); - // Two vectors can share an entry if each pair of corresponding - // elements could. - if (const ConstantVector *AV = dyn_cast(A)) - if (const ConstantVector *BV = dyn_cast(B)) { - if (AV->getType()->getNumElements() != BV->getType()->getNumElements()) - return false; - for (unsigned i = 0, e = AV->getType()->getNumElements(); i != e; ++i) - if (!CanShareConstantPoolEntry(AV->getOperand(i), - BV->getOperand(i), TD)) - return false; - return true; - } - - // TODO: Handle other cases. - - return false; + // Try constant folding a bitcast of both instructions to an integer. If we + // get two identical ConstantInt's, then we are good to share them. We use + // the constant folding APIs to do this so that we get the benefit of + // TargetData. + if (isa(A->getType())) + A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, + const_cast(A), TD); + else if (A->getType() != IntTy) + A = ConstantFoldInstOperands(Instruction::BitCast, IntTy, + const_cast(A), TD); + if (isa(B->getType())) + B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, + const_cast(B), TD); + else if (B->getType() != IntTy) + B = ConstantFoldInstOperands(Instruction::BitCast, IntTy, + const_cast(B), TD); + + return A == B; } /// getConstantPoolIndex - Create a new entry in the constant pool or return diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp index 054c750c9f2b..35591e1649d3 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -19,9 +19,8 @@ using namespace llvm; char MachineFunctionAnalysis::ID = 0; -MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm, - CodeGenOpt::Level OL) : - FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) { +MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm) : + FunctionPass(ID), TM(tm), MF(0) { initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); } diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index a240667f7d6a..e553a0463a2a 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Hashing.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -178,6 +179,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, IsKill = isKill; IsDead = isDead; IsUndef = isUndef; + IsInternalRead = false; IsEarlyClobber = false; IsDebug = isDebug; SubReg = 0; @@ -191,7 +193,6 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return false; switch (getType()) { - default: llvm_unreachable("Unrecognized operand type"); case MachineOperand::MO_Register: return getReg() == Other.getReg() && isDef() == Other.isDef() && getSubReg() == Other.getSubReg(); @@ -216,11 +217,14 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { getOffset() == Other.getOffset(); case MachineOperand::MO_BlockAddress: return getBlockAddress() == Other.getBlockAddress(); + case MO_RegisterMask: + return getRegMask() == Other.getRegMask(); case MachineOperand::MO_MCSymbol: return getMCSymbol() == Other.getMCSymbol(); case MachineOperand::MO_Metadata: return getMetadata() == Other.getMetadata(); } + llvm_unreachable("Invalid machine operand type"); } /// print - Print the specified machine operand. @@ -240,7 +244,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << PrintReg(getReg(), TRI, getSubReg()); if (isDef() || isKill() || isDead() || isImplicit() || isUndef() || - isEarlyClobber()) { + isInternalRead() || isEarlyClobber()) { OS << '<'; bool NeedComma = false; if (isDef()) { @@ -256,14 +260,26 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { NeedComma = true; } - if (isKill() || isDead() || isUndef()) { + if (isKill() || isDead() || isUndef() || isInternalRead()) { if (NeedComma) OS << ','; - if (isKill()) OS << "kill"; - if (isDead()) OS << "dead"; + NeedComma = false; + if (isKill()) { + OS << "kill"; + NeedComma = true; + } + if (isDead()) { + OS << "dead"; + NeedComma = true; + } if (isUndef()) { - if (isKill() || isDead()) - OS << ','; + if (NeedComma) OS << ','; OS << "undef"; + NeedComma = true; + } + if (isInternalRead()) { + if (NeedComma) OS << ','; + OS << "internal"; + NeedComma = true; } } OS << '>'; @@ -311,6 +327,9 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false); OS << '>'; break; + case MachineOperand::MO_RegisterMask: + OS << ""; + break; case MachineOperand::MO_Metadata: OS << '<'; WriteAsOperand(OS, getMetadata(), /*PrintType=*/false); @@ -319,8 +338,6 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { case MachineOperand::MO_MCSymbol: OS << "'; break; - default: - llvm_unreachable("Unrecognized operand type"); } if (unsigned TF = getTargetFlags()) @@ -364,10 +381,11 @@ MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) { MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f, uint64_t s, unsigned int a, - const MDNode *TBAAInfo) + const MDNode *TBAAInfo, + const MDNode *Ranges) : PtrInfo(ptrinfo), Size(s), Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)), - TBAAInfo(TBAAInfo) { + TBAAInfo(TBAAInfo), Ranges(Ranges) { assert((PtrInfo.V == 0 || isa(PtrInfo.V->getType())) && "invalid pointer value"); assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); @@ -465,7 +483,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { /// MCID NULL and no operands. MachineInstr::MachineInstr() : MCID(0), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), + NumMemRefs(0), MemRefs(0), Parent(0) { // Make sure that we get added to a machine basicblock LeakDetector::addGarbageObject(this); @@ -473,10 +491,10 @@ MachineInstr::MachineInstr() void MachineInstr::addImplicitDefUseOperands() { if (MCID->ImplicitDefs) - for (const unsigned *ImpDefs = MCID->ImplicitDefs; *ImpDefs; ++ImpDefs) + for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs) addOperand(MachineOperand::CreateReg(*ImpDefs, true, true)); if (MCID->ImplicitUses) - for (const unsigned *ImpUses = MCID->ImplicitUses; *ImpUses; ++ImpUses) + for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses) addOperand(MachineOperand::CreateReg(*ImpUses, false, true)); } @@ -485,7 +503,7 @@ void MachineInstr::addImplicitDefUseOperands() { /// the MCInstrDesc. MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp) : MCID(&tid), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), Parent(0) { + NumMemRefs(0), MemRefs(0), Parent(0) { unsigned NumImplicitOps = 0; if (!NoImp) NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); @@ -500,7 +518,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp) MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl, bool NoImp) : MCID(&tid), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { + NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) { unsigned NumImplicitOps = 0; if (!NoImp) NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); @@ -516,7 +534,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl, /// basic block. MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid) : MCID(&tid), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), Parent(0) { + NumMemRefs(0), MemRefs(0), Parent(0) { assert(MBB && "Cannot use inserting ctor with null basic block!"); unsigned NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); @@ -532,7 +550,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid) MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, const MCInstrDesc &tid) : MCID(&tid), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { + NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) { assert(MBB && "Cannot use inserting ctor with null basic block!"); unsigned NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); @@ -547,7 +565,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) : MCID(&MI.getDesc()), Flags(0), AsmPrinterFlags(0), - MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd), + NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs), Parent(0), debugLoc(MI.getDebugLoc()) { Operands.reserve(MI.getNumOperands()); @@ -722,17 +740,33 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { void MachineInstr::addMemOperand(MachineFunction &MF, MachineMemOperand *MO) { mmo_iterator OldMemRefs = MemRefs; - mmo_iterator OldMemRefsEnd = MemRefsEnd; + uint16_t OldNumMemRefs = NumMemRefs; - size_t NewNum = (MemRefsEnd - MemRefs) + 1; + uint16_t NewNum = NumMemRefs + 1; mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum); - mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum; - std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs); + std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs); NewMemRefs[NewNum - 1] = MO; MemRefs = NewMemRefs; - MemRefsEnd = NewMemRefsEnd; + NumMemRefs = NewNum; +} + +bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const { + const MachineBasicBlock *MBB = getParent(); + MachineBasicBlock::const_instr_iterator MII = *this; ++MII; + while (MII != MBB->end() && MII->isInsideBundle()) { + if (MII->getDesc().getFlags() & Mask) { + if (Type == AnyInBundle) + return true; + } else { + if (Type == AllInBundle) + return false; + } + ++MII; + } + + return Type == AllInBundle; } bool MachineInstr::isIdenticalTo(const MachineInstr *Other, @@ -743,6 +777,19 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other, Other->getNumOperands() != getNumOperands()) return false; + if (isBundle()) { + // Both instructions are bundles, compare MIs inside the bundle. + MachineBasicBlock::const_instr_iterator I1 = *this; + MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end(); + MachineBasicBlock::const_instr_iterator I2 = *Other; + MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end(); + while (++I1 != E1 && I1->isInsideBundle()) { + ++I2; + if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(I2, Check)) + return false; + } + } + // Check operands to make sure they match. for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); @@ -789,6 +836,18 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other, /// block, and returns it, but does not delete it. MachineInstr *MachineInstr::removeFromParent() { assert(getParent() && "Not embedded in a basic block!"); + + // If it's a bundle then remove the MIs inside the bundle as well. + if (isBundle()) { + MachineBasicBlock *MBB = getParent(); + MachineBasicBlock::instr_iterator MII = *this; ++MII; + MachineBasicBlock::instr_iterator E = MBB->instr_end(); + while (MII != E && MII->isInsideBundle()) { + MachineInstr *MI = &*MII; + ++MII; + MBB->remove(MI); + } + } getParent()->remove(this); return this; } @@ -798,6 +857,17 @@ MachineInstr *MachineInstr::removeFromParent() { /// block, and deletes it. void MachineInstr::eraseFromParent() { assert(getParent() && "Not embedded in a basic block!"); + // If it's a bundle then remove the MIs inside the bundle as well. + if (isBundle()) { + MachineBasicBlock *MBB = getParent(); + MachineBasicBlock::instr_iterator MII = *this; ++MII; + MachineBasicBlock::instr_iterator E = MBB->instr_end(); + while (MII != E && MII->isInsideBundle()) { + MachineInstr *MI = &*MII; + ++MII; + MBB->erase(MI); + } + } getParent()->erase(this); } @@ -817,6 +887,16 @@ unsigned MachineInstr::getNumExplicitOperands() const { return NumOperands; } +/// isBundled - Return true if this instruction part of a bundle. This is true +/// if either itself or its following instruction is marked "InsideBundle". +bool MachineInstr::isBundled() const { + if (isInsideBundle()) + return true; + MachineBasicBlock::const_instr_iterator nextMI = this; + ++nextMI; + return nextMI != Parent->instr_end() && nextMI->isInsideBundle(); +} + bool MachineInstr::isStackAligningInlineAsm() const { if (isInlineAsm()) { unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); @@ -887,6 +967,20 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, return NULL; } +/// getBundleSize - Return the number of instructions inside the MI bundle. +unsigned MachineInstr::getBundleSize() const { + assert(isBundle() && "Expecting a bundle"); + + MachineBasicBlock::const_instr_iterator I = *this; + unsigned Size = 0; + while ((++I)->isInsideBundle()) { + ++Size; + } + assert(Size > 1 && "Malformed bundle"); + + return Size; +} + /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of /// the specific register or -1 if it is not found. It further tightens /// the search criteria to a use that kills the register if isKill is true. @@ -948,6 +1042,10 @@ MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap, bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg); for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); + // Accept regmask operands when Overlap is set. + // Ignore them when looking for a specific def operand (Overlap == false). + if (isPhys && Overlap && MO.isRegMask() && MO.clobbersPhysReg(Reg)) + return i; if (!MO.isReg() || !MO.isDef()) continue; unsigned MOReg = MO.getReg(); @@ -1118,6 +1216,8 @@ void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) { /// copyPredicates - Copies predicate operand(s) from MI. void MachineInstr::copyPredicates(const MachineInstr *MI) { + assert(!isBundle() && "MachineInstr::copyPredicates() can't handle bundles"); + const MCInstrDesc &MCID = MI->getDesc(); if (!MCID.isPredicable()) return; @@ -1159,13 +1259,13 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, AliasAnalysis *AA, bool &SawStore) const { // Ignore stuff that we obviously can't move. - if (MCID->mayStore() || MCID->isCall()) { + if (mayStore() || isCall()) { SawStore = true; return false; } if (isLabel() || isDebugValue() || - MCID->isTerminator() || hasUnmodeledSideEffects()) + isTerminator() || hasUnmodeledSideEffects()) return false; // See if this instruction does a load. If so, we have to guarantee that the @@ -1173,7 +1273,7 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, // destination. The check for isInvariantLoad gives the targe the chance to // classify the load as always returning a constant, e.g. a constant pool // load. - if (MCID->mayLoad() && !isInvariantLoad(AA)) + if (mayLoad() && !isInvariantLoad(AA)) // Otherwise, this is a real load. If there is a store between the load and // end of block, or if the load is volatile, we can't move it. return !SawStore && !hasVolatileMemoryRef(); @@ -1213,9 +1313,9 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, /// have no volatile memory references. bool MachineInstr::hasVolatileMemoryRef() const { // An instruction known never to access memory won't have a volatile access. - if (!MCID->mayStore() && - !MCID->mayLoad() && - !MCID->isCall() && + if (!mayStore() && + !mayLoad() && + !isCall() && !hasUnmodeledSideEffects()) return false; @@ -1239,7 +1339,7 @@ bool MachineInstr::hasVolatileMemoryRef() const { /// *all* loads the instruction does are invariant (if it does multiple loads). bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { // If the instruction doesn't load at all, it isn't an invariant load. - if (!MCID->mayLoad()) + if (!mayLoad()) return false; // If the instruction has lost its memoperands, conservatively assume that @@ -1253,6 +1353,7 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { E = memoperands_end(); I != E; ++I) { if ((*I)->isVolatile()) return false; if ((*I)->isStore()) return false; + if ((*I)->isInvariant()) return true; if (const Value *V = (*I)->getValue()) { // A load from a constant PseudoSourceValue is invariant. @@ -1291,7 +1392,7 @@ unsigned MachineInstr::isConstantValuePHI() const { } bool MachineInstr::hasUnmodeledSideEffects() const { - if (getDesc().hasUnmodeledSideEffects()) + if (hasProperty(MCID::UnmodeledSideEffects)) return true; if (isInlineAsm()) { unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); @@ -1384,7 +1485,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { OS << " = "; // Print the opcode name. - OS << getDesc().getName(); + if (TM && TM->getInstrInfo()) + OS << TM->getInstrInfo()->getName(getOpcode()); + else + OS << "UNKNOWN"; // Print the rest of the operands. bool OmittedAnyCallClobbers = false; @@ -1419,14 +1523,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { // call instructions much less noisy on targets where calls clobber lots // of registers. Don't rely on MO.isDead() because we may be called before // LiveVariables is run, or we may be looking at a non-allocatable reg. - if (MF && getDesc().isCall() && + if (MF && isCall() && MO.isReg() && MO.isImplicit() && MO.isDef()) { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { const MachineRegisterInfo &MRI = MF->getRegInfo(); if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) { bool HasAliasLive = false; - for (const unsigned *Alias = TM->getRegisterInfo()->getAliasSet(Reg); + for (const uint16_t *Alias = TM->getRegisterInfo()->getAliasSet(Reg); unsigned AliasReg = *Alias; ++Alias) if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) { HasAliasLive = true; @@ -1617,6 +1721,20 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, return Found; } +void MachineInstr::clearRegisterKills(unsigned Reg, + const TargetRegisterInfo *RegInfo) { + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + RegInfo = 0; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isUse() || !MO.isKill()) + continue; + unsigned OpReg = MO.getReg(); + if (OpReg == Reg || (RegInfo && RegInfo->isSuperRegister(Reg, OpReg))) + MO.setIsKill(false); + } +} + bool MachineInstr::addRegisterDead(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { @@ -1689,16 +1807,21 @@ void MachineInstr::addRegisterDefined(unsigned IncomingReg, true /*IsImp*/)); } -void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl &UsedRegs, +void MachineInstr::setPhysRegsDeadExcept(ArrayRef UsedRegs, const TargetRegisterInfo &TRI) { + bool HasRegMask = false; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { MachineOperand &MO = getOperand(i); + if (MO.isRegMask()) { + HasRegMask = true; + continue; + } if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); - if (Reg == 0) continue; + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; bool Dead = true; - for (SmallVectorImpl::const_iterator I = UsedRegs.begin(), - E = UsedRegs.end(); I != E; ++I) + for (ArrayRef::iterator I = UsedRegs.begin(), E = UsedRegs.end(); + I != E; ++I) if (TRI.regsOverlap(*I, Reg)) { Dead = false; break; @@ -1706,53 +1829,66 @@ void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl &UsedRe // If there are no uses, including partial uses, the def is dead. if (Dead) MO.setIsDead(); } + + // This is a call with a register mask operand. + // Mask clobbers are always dead, so add defs for the non-dead defines. + if (HasRegMask) + for (ArrayRef::iterator I = UsedRegs.begin(), E = UsedRegs.end(); + I != E; ++I) + addRegisterDefined(*I, &TRI); } unsigned MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { - unsigned Hash = MI->getOpcode() * 37; + // Build up a buffer of hash code components. + // + // FIXME: This is a total hack. We should have a hash_value overload for + // MachineOperand, but currently that doesn't work because there are many + // different ideas of "equality" and thus different sets of information that + // contribute to the hash code. This one happens to want to take a specific + // subset. And it's still not clear that this routine uses the *correct* + // subset of information when computing the hash code. The goal is to use the + // same inputs for the hash code here that MachineInstr::isIdenticalTo uses to + // test for equality when passed the 'IgnoreVRegDefs' filter flag. It would + // be very useful to factor the selection of relevant inputs out of the two + // functions and into a common routine, but it's not clear how that can be + // done. + SmallVector HashComponents; + HashComponents.reserve(MI->getNumOperands() + 1); + HashComponents.push_back(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - uint64_t Key = (uint64_t)MO.getType() << 32; switch (MO.getType()) { default: break; case MachineOperand::MO_Register: if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; // Skip virtual register defs. - Key |= MO.getReg(); + HashComponents.push_back(hash_combine(MO.getType(), MO.getReg())); break; case MachineOperand::MO_Immediate: - Key |= MO.getImm(); + HashComponents.push_back(hash_combine(MO.getType(), MO.getImm())); break; case MachineOperand::MO_FrameIndex: case MachineOperand::MO_ConstantPoolIndex: case MachineOperand::MO_JumpTableIndex: - Key |= MO.getIndex(); + HashComponents.push_back(hash_combine(MO.getType(), MO.getIndex())); break; case MachineOperand::MO_MachineBasicBlock: - Key |= DenseMapInfo::getHashValue(MO.getMBB()); + HashComponents.push_back(hash_combine(MO.getType(), MO.getMBB())); break; case MachineOperand::MO_GlobalAddress: - Key |= DenseMapInfo::getHashValue(MO.getGlobal()); + HashComponents.push_back(hash_combine(MO.getType(), MO.getGlobal())); break; case MachineOperand::MO_BlockAddress: - Key |= DenseMapInfo::getHashValue(MO.getBlockAddress()); + HashComponents.push_back(hash_combine(MO.getType(), + MO.getBlockAddress())); break; case MachineOperand::MO_MCSymbol: - Key |= DenseMapInfo::getHashValue(MO.getMCSymbol()); + HashComponents.push_back(hash_combine(MO.getType(), MO.getMCSymbol())); break; } - Key += ~(Key << 32); - Key ^= (Key >> 22); - Key += ~(Key << 13); - Key ^= (Key >> 8); - Key += (Key << 3); - Key ^= (Key >> 15); - Key += ~(Key << 27); - Key ^= (Key >> 31); - Hash = (unsigned)Key + Hash * 37; } - return Hash; + return hash_combine_range(HashComponents.begin(), HashComponents.end()); } void MachineInstr::emitError(StringRef Msg) const { diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp new file mode 100644 index 000000000000..73489a7160bf --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -0,0 +1,278 @@ +//===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +namespace { + class UnpackMachineBundles : public MachineFunctionPass { + public: + static char ID; // Pass identification + UnpackMachineBundles() : MachineFunctionPass(ID) { + initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + }; +} // end anonymous namespace + +char UnpackMachineBundles::ID = 0; +char &llvm::UnpackMachineBundlesID = UnpackMachineBundles::ID; +INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundles", + "Unpack machine instruction bundles", false, false) + +bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = &*I; + + for (MachineBasicBlock::instr_iterator MII = MBB->instr_begin(), + MIE = MBB->instr_end(); MII != MIE; ) { + MachineInstr *MI = &*MII; + + // Remove BUNDLE instruction and the InsideBundle flags from bundled + // instructions. + if (MI->isBundle()) { + while (++MII != MIE && MII->isInsideBundle()) { + MII->setIsInsideBundle(false); + for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MII->getOperand(i); + if (MO.isReg() && MO.isInternalRead()) + MO.setIsInternalRead(false); + } + } + MI->eraseFromParent(); + + Changed = true; + continue; + } + + ++MII; + } + } + + return Changed; +} + + +namespace { + class FinalizeMachineBundles : public MachineFunctionPass { + public: + static char ID; // Pass identification + FinalizeMachineBundles() : MachineFunctionPass(ID) { + initializeFinalizeMachineBundlesPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + }; +} // end anonymous namespace + +char FinalizeMachineBundles::ID = 0; +char &llvm::FinalizeMachineBundlesID = FinalizeMachineBundles::ID; +INITIALIZE_PASS(FinalizeMachineBundles, "finalize-mi-bundles", + "Finalize machine instruction bundles", false, false) + +bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) { + return llvm::finalizeBundles(MF); +} + + +/// finalizeBundle - Finalize a machine instruction bundle which includes +/// a sequence of instructions starting from FirstMI to LastMI (exclusive). +/// This routine adds a BUNDLE instruction to represent the bundle, it adds +/// IsInternalRead markers to MachineOperands which are defined inside the +/// bundle, and it copies externally visible defs and uses to the BUNDLE +/// instruction. +void llvm::finalizeBundle(MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator FirstMI, + MachineBasicBlock::instr_iterator LastMI) { + assert(FirstMI != LastMI && "Empty bundle?"); + + const TargetMachine &TM = MBB.getParent()->getTarget(); + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + MachineInstrBuilder MIB = BuildMI(MBB, FirstMI, FirstMI->getDebugLoc(), + TII->get(TargetOpcode::BUNDLE)); + + SmallVector LocalDefs; + SmallSet LocalDefSet; + SmallSet DeadDefSet; + SmallSet KilledDefSet; + SmallVector ExternUses; + SmallSet ExternUseSet; + SmallSet KilledUseSet; + SmallSet UndefUseSet; + SmallVector Defs; + for (; FirstMI != LastMI; ++FirstMI) { + for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = FirstMI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isDef()) { + Defs.push_back(&MO); + continue; + } + + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (LocalDefSet.count(Reg)) { + MO.setIsInternalRead(); + if (MO.isKill()) + // Internal def is now killed. + KilledDefSet.insert(Reg); + } else { + if (ExternUseSet.insert(Reg)) { + ExternUses.push_back(Reg); + if (MO.isUndef()) + UndefUseSet.insert(Reg); + } + if (MO.isKill()) + // External def is now killed. + KilledUseSet.insert(Reg); + } + } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + MachineOperand &MO = *Defs[i]; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (LocalDefSet.insert(Reg)) { + LocalDefs.push_back(Reg); + if (MO.isDead()) { + DeadDefSet.insert(Reg); + } + } else { + // Re-defined inside the bundle, it's no longer killed. + KilledDefSet.erase(Reg); + if (!MO.isDead()) + // Previously defined but dead. + DeadDefSet.erase(Reg); + } + + if (!MO.isDead()) { + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + if (LocalDefSet.insert(SubReg)) + LocalDefs.push_back(SubReg); + } + } + } + + FirstMI->setIsInsideBundle(); + Defs.clear(); + } + + SmallSet Added; + for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { + unsigned Reg = LocalDefs[i]; + if (Added.insert(Reg)) { + // If it's not live beyond end of the bundle, mark it dead. + bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg); + MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) | + getImplRegState(true)); + } + } + + for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) { + unsigned Reg = ExternUses[i]; + bool isKill = KilledUseSet.count(Reg); + bool isUndef = UndefUseSet.count(Reg); + MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) | + getImplRegState(true)); + } +} + +/// finalizeBundle - Same functionality as the previous finalizeBundle except +/// the last instruction in the bundle is not provided as an input. This is +/// used in cases where bundles are pre-determined by marking instructions +/// with 'InsideBundle' marker. It returns the MBB instruction iterator that +/// points to the end of the bundle. +MachineBasicBlock::instr_iterator +llvm::finalizeBundle(MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator FirstMI) { + MachineBasicBlock::instr_iterator E = MBB.instr_end(); + MachineBasicBlock::instr_iterator LastMI = llvm::next(FirstMI); + while (LastMI != E && LastMI->isInsideBundle()) + ++LastMI; + finalizeBundle(MBB, FirstMI, LastMI); + return LastMI; +} + +/// finalizeBundles - Finalize instruction bundles in the specified +/// MachineFunction. Return true if any bundles are finalized. +bool llvm::finalizeBundles(MachineFunction &MF) { + bool Changed = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock &MBB = *I; + + MachineBasicBlock::instr_iterator MII = MBB.instr_begin(); + assert(!MII->isInsideBundle() && + "First instr cannot be inside bundle before finalization!"); + + MachineBasicBlock::instr_iterator MIE = MBB.instr_end(); + if (MII == MIE) + continue; + for (++MII; MII != MIE; ) { + if (!MII->isInsideBundle()) + ++MII; + else { + MII = finalizeBundle(MBB, llvm::prior(MII)); + Changed = true; + } + } + } + + return Changed; +} + +//===----------------------------------------------------------------------===// +// MachineOperand iterator +//===----------------------------------------------------------------------===// + +MachineOperandIteratorBase::RegInfo +MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg, + SmallVectorImpl > *Ops) { + RegInfo RI = { false, false, false }; + for(; isValid(); ++*this) { + MachineOperand &MO = deref(); + if (!MO.isReg() || MO.getReg() != Reg) + continue; + + // Remember each (MI, OpNo) that refers to Reg. + if (Ops) + Ops->push_back(std::make_pair(MO.getParent(), getOperandNo())); + + // Both defs and uses can read virtual registers. + if (MO.readsReg()) { + RI.Reads = true; + if (MO.isDef()) + RI.Tied = true; + } + + // Only defs can write. + if (MO.isDef()) + RI.Writes = true; + else if (!RI.Tied && MO.getParent()->isRegTiedToDefOperand(getOperandNo())) + RI.Tied = true; + } + return RI; +} diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp index a1f80d5282e0..8c562cc4454a 100644 --- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -45,7 +45,7 @@ using namespace llvm; static cl::opt AvoidSpeculation("avoid-speculation", cl::desc("MachineLICM should avoid speculation"), - cl::init(false), cl::Hidden); + cl::init(true), cl::Hidden); STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); @@ -60,8 +60,6 @@ STATISTIC(NumPostRAHoisted, namespace { class MachineLICM : public MachineFunctionPass { - bool PreRegAlloc; - const TargetMachine *TM; const TargetInstrInfo *TII; const TargetLowering *TLI; @@ -69,6 +67,7 @@ namespace { const MachineFrameInfo *MFI; MachineRegisterInfo *MRI; const InstrItineraryData *InstrItins; + bool PreRegAlloc; // Various analyses that we use... AliasAnalysis *AA; // Alias analysis info. @@ -81,7 +80,13 @@ namespace { MachineLoop *CurLoop; // The current loop we are working on. MachineBasicBlock *CurPreheader; // The preheader for CurLoop. - BitVector AllocatableSet; + // Exit blocks for CurLoop. + SmallVector ExitBlocks; + + bool isExitBlock(const MachineBasicBlock *MBB) const { + return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) != + ExitBlocks.end(); + } // Track 'estimated' register pressure. SmallSet RegSeen; @@ -122,8 +127,6 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF); - const char *getPassName() const { return "Machine Instruction LICM"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); @@ -165,7 +168,9 @@ namespace { /// ProcessMI - Examine the instruction for potentai LICM candidate. Also /// gather register def and frame object update information. - void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs, + void ProcessMI(MachineInstr *MI, + BitVector &PhysRegDefs, + BitVector &PhysRegClobbers, SmallSet &StoredFIs, SmallVector &Candidates); @@ -182,12 +187,12 @@ namespace { /// invariant. I.e., all virtual register operands are defined outside of /// the loop, physical registers aren't accessed (explicitly or implicitly), /// and the instruction is hoistable. - /// + /// bool IsLoopInvariantInst(MachineInstr &I); - /// HasAnyPHIUse - Return true if the specified register is used by any - /// phi node. - bool HasAnyPHIUse(unsigned Reg) const; + /// HasLoopPHIUse - Return true if the specified instruction is used by any + /// phi node in the current loop. + bool HasLoopPHIUse(const MachineInstr *MI) const; /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' /// and an use in the current loop, return true if the target considered @@ -200,7 +205,7 @@ namespace { /// CanCauseHighRegPressure - Visit BBs from header to current BB, /// check if hoisting an instruction of the given cost matrix can cause high /// register pressure. - bool CanCauseHighRegPressure(DenseMap &Cost); + bool CanCauseHighRegPressure(DenseMap &Cost, bool Cheap); /// UpdateBackTraceRegPressure - Traverse the back trace from header to /// the current block and update their register pressures to reflect the @@ -215,13 +220,25 @@ namespace { /// If not then a load from this mbb may not be safe to hoist. bool IsGuaranteedToExecute(MachineBasicBlock *BB); - /// HoistRegion - Walk the specified region of the CFG (defined by all - /// blocks dominated by the specified block, and that are in the current - /// loop) in depth first order w.r.t the DominatorTree. This allows us to - /// visit definitions before uses, allowing us to hoist a loop body in one - /// pass without iteration. + void EnterScope(MachineBasicBlock *MBB); + + void ExitScope(MachineBasicBlock *MBB); + + /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given + /// dominator tree node if its a leaf or all of its children are done. Walk + /// up the dominator tree to destroy ancestors which are now done. + void ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap &OpenChildren, + DenseMap &ParentMap); + + /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all + /// blocks dominated by the specified header block, and that are in the + /// current loop) in depth first order w.r.t the DominatorTree. This allows + /// us to visit definitions before uses, allowing us to hoist a loop body in + /// one pass without iteration. /// - void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false); + void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); + void HoistRegion(MachineDomTreeNode *N, bool IsHeader); /// getRegisterClassIDAndCost - For a given MI, register, and the operand /// index, return the ID and cost of its representative register class by @@ -278,6 +295,7 @@ namespace { } // end anonymous namespace char MachineLICM::ID = 0; +char &llvm::MachineLICMID = MachineLICM::ID; INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", "Machine Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) @@ -286,10 +304,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(MachineLICM, "machinelicm", "Machine Loop Invariant Code Motion", false, false) -FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) { - return new MachineLICM(PreRegAlloc); -} - /// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most /// loop that has a unique predecessor. static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { @@ -305,12 +319,6 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { } bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { - if (PreRegAlloc) - DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); - else - DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); - DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n"); - Changed = FirstInLoop = false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); @@ -319,7 +327,14 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { MFI = MF.getFrameInfo(); MRI = &MF.getRegInfo(); InstrItins = TM->getInstrItineraryData(); - AllocatableSet = TRI->getAllocatableSet(MF); + + PreRegAlloc = MRI->isSSA(); + + if (PreRegAlloc) + DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); + else + DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); + DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n"); if (PreRegAlloc) { // Estimate register pressure during pre-regalloc pass. @@ -341,6 +356,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { while (!Worklist.empty()) { CurLoop = Worklist.pop_back_val(); CurPreheader = 0; + ExitBlocks.clear(); // If this is done before regalloc, only visit outer-most preheader-sporting // loops. @@ -349,6 +365,8 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { continue; } + CurLoop->getExitBlocks(ExitBlocks); + if (!PreRegAlloc) HoistRegionPostRA(); else { @@ -356,7 +374,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { // being hoisted. MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); FirstInLoop = true; - HoistRegion(N, true); + HoistOutOfLoop(N); CSEMap.clear(); } } @@ -383,7 +401,8 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { /// ProcessMI - Examine the instruction for potentai LICM candidate. Also /// gather register def and frame object update information. void MachineLICM::ProcessMI(MachineInstr *MI, - unsigned *PhysRegDefs, + BitVector &PhysRegDefs, + BitVector &PhysRegClobbers, SmallSet &StoredFIs, SmallVector &Candidates) { bool RuledOut = false; @@ -402,6 +421,13 @@ void MachineLICM::ProcessMI(MachineInstr *MI, continue; } + // We can't hoist an instruction defining a physreg that is clobbered in + // the loop. + if (MO.isRegMask()) { + PhysRegClobbers.setBitsNotInMask(MO.getRegMask()); + continue; + } + if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); @@ -411,7 +437,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI, "Not expecting virtual register!"); if (!MO.isDef()) { - if (Reg && PhysRegDefs[Reg]) + if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) // If it's using a non-loop-invariant register, then it's obviously not // safe to hoist. HasNonInvariantUse = true; @@ -419,9 +445,8 @@ void MachineLICM::ProcessMI(MachineInstr *MI, } if (MO.isImplicit()) { - ++PhysRegDefs[Reg]; - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - ++PhysRegDefs[*AS]; + for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) + PhysRegClobbers.set(*AS); if (!MO.isDead()) // Non-dead implicit def? This cannot be hoisted. RuledOut = true; @@ -438,14 +463,17 @@ void MachineLICM::ProcessMI(MachineInstr *MI, Def = Reg; // If we have already seen another instruction that defines the same - // register, then this is not safe. - if (++PhysRegDefs[Reg] > 1) - // MI defined register is seen defined by another instruction in - // the loop, it cannot be a LICM candidate. - RuledOut = true; - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - if (++PhysRegDefs[*AS] > 1) + // register, then this is not safe. Two defs is indicated by setting a + // PhysRegClobbers bit. + for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) { + if (PhysRegDefs.test(*AS)) + PhysRegClobbers.set(*AS); + if (PhysRegClobbers.test(*AS)) + // MI defined register is seen defined by another instruction in + // the loop, it cannot be a LICM candidate. RuledOut = true; + PhysRegDefs.set(*AS); + } } // Only consider reloads for now and remats which do not have register @@ -461,9 +489,13 @@ void MachineLICM::ProcessMI(MachineInstr *MI, /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop /// invariants out to the preheader. void MachineLICM::HoistRegionPostRA() { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + unsigned NumRegs = TRI->getNumRegs(); - unsigned *PhysRegDefs = new unsigned[NumRegs]; - std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0); + BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. + BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. SmallVector Candidates; SmallSet StoredFIs; @@ -485,16 +517,31 @@ void MachineLICM::HoistRegionPostRA() { for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), E = BB->livein_end(); I != E; ++I) { unsigned Reg = *I; - ++PhysRegDefs[Reg]; - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - ++PhysRegDefs[*AS]; + for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) + PhysRegDefs.set(*AS); } SpeculationState = SpeculateUnknown; for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); MII != E; ++MII) { MachineInstr *MI = &*MII; - ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates); + ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); + } + } + + // Gather the registers read / clobbered by the terminator. + BitVector TermRegs(NumRegs); + MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); + if (TI != Preheader->end()) { + for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = TI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) + TermRegs.set(*AS); } } @@ -503,19 +550,25 @@ void MachineLICM::HoistRegionPostRA() { // instruction in the loop. // 2. If the candidate is a load from stack slot (always true for now), // check if the slot is stored anywhere in the loop. + // 3. Make sure candidate def should not clobber + // registers read by the terminator. Similarly its def should not be + // clobbered by the terminator. for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { if (Candidates[i].FI != INT_MIN && StoredFIs.count(Candidates[i].FI)) continue; - if (PhysRegDefs[Candidates[i].Def] == 1) { + unsigned Def = Candidates[i].Def; + if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { bool Safe = true; MachineInstr *MI = Candidates[i].MI; for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { const MachineOperand &MO = MI->getOperand(j); if (!MO.isReg() || MO.isDef() || !MO.getReg()) continue; - if (PhysRegDefs[MO.getReg()]) { + unsigned Reg = MO.getReg(); + if (PhysRegDefs.test(Reg) || + PhysRegClobbers.test(Reg)) { // If it's using a non-loop-invariant register, then it's obviously // not safe to hoist. Safe = false; @@ -526,8 +579,6 @@ void MachineLICM::HoistRegionPostRA() { HoistPostRA(MI, Candidates[i].Def); } } - - delete[] PhysRegDefs; } /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current @@ -556,26 +607,17 @@ void MachineLICM::AddToLiveIns(unsigned Reg) { /// dirty work. void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { MachineBasicBlock *Preheader = getCurPreheader(); - if (!Preheader) return; // Now move the instructions to the predecessor, inserting it before any // terminator instructions. - DEBUG({ - dbgs() << "Hoisting " << *MI; - if (Preheader->getBasicBlock()) - dbgs() << " to MachineBasicBlock " - << Preheader->getName(); - if (MI->getParent()->getBasicBlock()) - dbgs() << " from MachineBasicBlock " - << MI->getParent()->getName(); - dbgs() << "\n"; - }); + DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#" + << MI->getParent()->getNumber() << ": " << *MI); // Splice the instruction to the preheader. MachineBasicBlock *MBB = MI->getParent(); Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); - // Add register to livein list to all the BBs in the current loop since a + // Add register to livein list to all the BBs in the current loop since a // loop invariant must be kept live throughout the whole loop. This is // important to ensure later passes do not scavenge the def register. AddToLiveIns(Def); @@ -589,7 +631,7 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { if (SpeculationState != SpeculateUnknown) return SpeculationState == SpeculateFalse; - + if (BB != CurLoop->getHeader()) { // Check loop exiting blocks. SmallVector CurrentLoopExitingBlocks; @@ -605,57 +647,126 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { return true; } -/// HoistRegion - Walk the specified region of the CFG (defined by all blocks -/// dominated by the specified block, and that are in the current loop) in depth -/// first order w.r.t the DominatorTree. This allows us to visit definitions -/// before uses, allowing us to hoist a loop body in one pass without iteration. -/// -void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) { - assert(N != 0 && "Null dominator tree node?"); - MachineBasicBlock *BB = N->getBlock(); +void MachineLICM::EnterScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); - // If the header of the loop containing this basic block is a landing pad, - // then don't try to hoist instructions out of this loop. - const MachineLoop *ML = MLI->getLoopFor(BB); - if (ML && ML->getHeader()->isLandingPad()) return; + // Remember livein register pressure. + BackTrace.push_back(RegPressure); +} - // If this subregion is not in the top level loop at all, exit. - if (!CurLoop->contains(BB)) return; +void MachineLICM::ExitScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); + BackTrace.pop_back(); +} - MachineBasicBlock *Preheader = getCurPreheader(); - if (!Preheader) +/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given +/// dominator tree node if its a leaf or all of its children are done. Walk +/// up the dominator tree to destroy ancestors which are now done. +void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap &OpenChildren, + DenseMap &ParentMap) { + if (OpenChildren[Node]) return; - if (IsHeader) { + // Pop scope. + ExitScope(Node->getBlock()); + + // Now traverse upwards to pop ancestors whose offsprings are all done. + while (MachineDomTreeNode *Parent = ParentMap[Node]) { + unsigned Left = --OpenChildren[Parent]; + if (Left != 0) + break; + ExitScope(Parent->getBlock()); + Node = Parent; + } +} + +/// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all +/// blocks dominated by the specified header block, and that are in the +/// current loop) in depth first order w.r.t the DominatorTree. This allows +/// us to visit definitions before uses, allowing us to hoist a loop body in +/// one pass without iteration. +/// +void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { + SmallVector Scopes; + SmallVector WorkList; + DenseMap ParentMap; + DenseMap OpenChildren; + + // Perform a DFS walk to determine the order of visit. + WorkList.push_back(HeaderN); + do { + MachineDomTreeNode *Node = WorkList.pop_back_val(); + assert(Node != 0 && "Null dominator tree node?"); + MachineBasicBlock *BB = Node->getBlock(); + + // If the header of the loop containing this basic block is a landing pad, + // then don't try to hoist instructions out of this loop. + const MachineLoop *ML = MLI->getLoopFor(BB); + if (ML && ML->getHeader()->isLandingPad()) + continue; + + // If this subregion is not in the top level loop at all, exit. + if (!CurLoop->contains(BB)) + continue; + + Scopes.push_back(Node); + const std::vector &Children = Node->getChildren(); + unsigned NumChildren = Children.size(); + + // Don't hoist things out of a large switch statement. This often causes + // code to be hoisted that wasn't going to be executed, and increases + // register pressure in a situation where it's likely to matter. + if (BB->succ_size() >= 25) + NumChildren = 0; + + OpenChildren[Node] = NumChildren; + // Add children in reverse order as then the next popped worklist node is + // the first child of this node. This means we ultimately traverse the + // DOM tree in exactly the same order as if we'd recursed. + for (int i = (int)NumChildren-1; i >= 0; --i) { + MachineDomTreeNode *Child = Children[i]; + ParentMap[Child] = Node; + WorkList.push_back(Child); + } + } while (!WorkList.empty()); + + if (Scopes.size() != 0) { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + // Compute registers which are livein into the loop headers. RegSeen.clear(); BackTrace.clear(); InitRegPressure(Preheader); } - // Remember livein register pressure. - BackTrace.push_back(RegPressure); + // Now perform LICM. + for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { + MachineDomTreeNode *Node = Scopes[i]; + MachineBasicBlock *MBB = Node->getBlock(); - SpeculationState = SpeculateUnknown; - for (MachineBasicBlock::iterator - MII = BB->begin(), E = BB->end(); MII != E; ) { - MachineBasicBlock::iterator NextMII = MII; ++NextMII; - MachineInstr *MI = &*MII; - if (!Hoist(MI, Preheader)) - UpdateRegPressure(MI); - MII = NextMII; + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + continue; + + EnterScope(MBB); + + // Process the block + SpeculationState = SpeculateUnknown; + for (MachineBasicBlock::iterator + MII = MBB->begin(), E = MBB->end(); MII != E; ) { + MachineBasicBlock::iterator NextMII = MII; ++NextMII; + MachineInstr *MI = &*MII; + if (!Hoist(MI, Preheader)) + UpdateRegPressure(MI); + MII = NextMII; + } + + // If it's a leaf node, it's done. Traverse upwards to pop ancestors. + ExitScopeIfDone(Node, OpenChildren, ParentMap); } - - // Don't hoist things out of a large switch statement. This often causes - // code to be hoisted that wasn't going to be executed, and increases - // register pressure in a situation where it's likely to matter. - if (BB->succ_size() < 25) { - const std::vector &Children = N->getChildren(); - for (unsigned I = 0, E = Children.size(); I != E; ++I) - HoistRegion(Children[I]); - } - - BackTrace.pop_back(); } static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { @@ -670,7 +781,7 @@ MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI, unsigned &RCId, unsigned &RCCost) const { const TargetRegisterClass *RC = MRI->getRegClass(Reg); EVT VT = *RC->vt_begin(); - if (VT == MVT::untyped) { + if (VT == MVT::Untyped) { RCId = RC->getID(); RCCost = 1; } else { @@ -678,7 +789,7 @@ MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI, RCCost = TLI->getRepRegClassCostFor(VT); } } - + /// InitRegPressure - Find all virtual register references that are liveout of /// the preheader to initialize the starting "register pressure". Note this /// does not count live through (livein but not used) registers. @@ -762,6 +873,21 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { } } +/// isLoadFromGOTOrConstantPool - Return true if this machine instruction +/// loads from global offset table or constant pool. +static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { + assert (MI.mayLoad() && "Expected MI that loads!"); + for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), + E = MI.memoperands_end(); I != E; ++I) { + if (const Value *V = (*I)->getValue()) { + if (const PseudoSourceValue *PSV = dyn_cast(V)) + if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool()) + return true; + } + } + return false; +} + /// IsLICMCandidate - Returns true if the instruction may be a suitable /// candidate for LICM. e.g. If the instruction is a call, then it's obviously /// not safe to hoist it. @@ -773,9 +899,12 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) { // If it is load then check if it is guaranteed to execute by making sure that // it dominates all exiting blocks. If it doesn't, then there is a path out of - // the loop which does not execute this load, so we can't hoist it. + // the loop which does not execute this load, so we can't hoist it. Loads + // from constant memory are not safe to speculate all the time, for example + // indexed load from a jump table. // Stores and side effects are already checked by isSafeToMove. - if (I.getDesc().mayLoad() && !IsGuaranteedToExecute(I.getParent())) + if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) && + !IsGuaranteedToExecute(I.getParent())) return false; return true; @@ -785,7 +914,7 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) { /// invariant. I.e., all virtual register operands are defined outside of the /// loop, physical registers aren't accessed explicitly, and there are no side /// effects that aren't captured by the operands or other flags. -/// +/// bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { if (!IsLICMCandidate(I)) return false; @@ -806,18 +935,8 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. - if (!MRI->def_empty(Reg)) + if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent())) return false; - if (AllocatableSet.test(Reg)) - return false; - // Check for a def among the register's aliases too. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - if (!MRI->def_empty(AliasReg)) - return false; - if (AllocatableSet.test(AliasReg)) - return false; - } // Otherwise it's safe to move. continue; } else if (!MO.isDead()) { @@ -847,22 +966,40 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { } -/// HasAnyPHIUse - Return true if the specified register is used by any -/// phi node. -bool MachineLICM::HasAnyPHIUse(unsigned Reg) const { - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), - UE = MRI->use_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - if (UseMI->isPHI()) - return true; - // Look pass copies as well. - if (UseMI->isCopy()) { - unsigned Def = UseMI->getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Def) && - HasAnyPHIUse(Def)) - return true; +/// HasLoopPHIUse - Return true if the specified instruction is used by a +/// phi node and hoisting it could cause a copy to be inserted. +bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { + SmallVector Work(1, MI); + do { + MI = Work.pop_back_val(); + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isDef()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + // A PHI may cause a copy to be inserted. + if (UseMI->isPHI()) { + // A PHI inside the loop causes a copy because the live range of Reg is + // extended across the PHI. + if (CurLoop->contains(UseMI)) + return true; + // A PHI in an exit block can cause a copy to be inserted if the PHI + // has multiple predecessors in the loop with different values. + // For now, approximate by rejecting all exit blocks. + if (isExitBlock(UseMI->getParent())) + return true; + continue; + } + // Look past copies as well. + if (UseMI->isCopy() && CurLoop->contains(UseMI)) + Work.push_back(UseMI); + } } - } + } while (!Work.empty()); return false; } @@ -903,7 +1040,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, /// IsCheapInstruction - Return true if the instruction is marked "cheap" or /// the operand latency between its def and a use is one or less. bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { - if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike()) + if (MI.isAsCheapAsAMove() || MI.isCopyLike()) return true; if (!InstrItins || InstrItins->isEmpty()) return false; @@ -930,16 +1067,25 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { /// CanCauseHighRegPressure - Visit BBs from header to current BB, check /// if hoisting an instruction of the given cost matrix can cause high /// register pressure. -bool MachineLICM::CanCauseHighRegPressure(DenseMap &Cost) { +bool MachineLICM::CanCauseHighRegPressure(DenseMap &Cost, + bool CheapInstr) { for (DenseMap::iterator CI = Cost.begin(), CE = Cost.end(); CI != CE; ++CI) { - if (CI->second <= 0) + if (CI->second <= 0) continue; unsigned RCId = CI->first; + unsigned Limit = RegLimit[RCId]; + int Cost = CI->second; + + // Don't hoist cheap instructions if they would increase register pressure, + // even if we're under the limit. + if (CheapInstr) + return true; + for (unsigned i = BackTrace.size(); i != 0; --i) { SmallVector &RP = BackTrace[i-1]; - if (RP[RCId] + CI->second >= RegLimit[RCId]) + if (RP[RCId] + Cost >= Limit) return true; } } @@ -999,87 +1145,95 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { if (MI.isImplicitDef()) return true; - // If the instruction is cheap, only hoist if it is re-materilizable. LICM - // will increase register pressure. It's probably not worth it if the - // instruction is cheap. - // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting - // these tend to help performance in low register pressure situation. The - // trade off is it may cause spill in high pressure situation. It will end up - // adding a store in the loop preheader. But the reload is no more expensive. - // The side benefit is these loads are frequently CSE'ed. - if (IsCheapInstruction(MI)) { - if (!TII->isTriviallyReMaterializable(&MI, AA)) - return false; - } else { - // Estimate register pressure to determine whether to LICM the instruction. - // In low register pressure situation, we can be more aggressive about - // hoisting. Also, favors hoisting long latency instructions even in - // moderately high pressure situation. - // FIXME: If there are long latency loop-invariant instructions inside the - // loop at this point, why didn't the optimizer's LICM hoist them? - DenseMap Cost; - for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.isImplicit()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; + // Besides removing computation from the loop, hoisting an instruction has + // these effects: + // + // - The value defined by the instruction becomes live across the entire + // loop. This increases register pressure in the loop. + // + // - If the value is used by a PHI in the loop, a copy will be required for + // lowering the PHI after extending the live range. + // + // - When hoisting the last use of a value in the loop, that value no longer + // needs to be live in the loop. This lowers register pressure in the loop. - unsigned RCId, RCCost; - getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); - if (MO.isDef()) { - if (HasHighOperandLatency(MI, i, Reg)) { - ++NumHighLatency; - return true; - } + bool CheapInstr = IsCheapInstruction(MI); + bool CreatesCopy = HasLoopPHIUse(&MI); - DenseMap::iterator CI = Cost.find(RCId); - if (CI != Cost.end()) - CI->second += RCCost; - else - Cost.insert(std::make_pair(RCId, RCCost)); - } else if (isOperandKill(MO, MRI)) { - // Is a virtual register use is a kill, hoisting it out of the loop - // may actually reduce register pressure or be register pressure - // neutral. - DenseMap::iterator CI = Cost.find(RCId); - if (CI != Cost.end()) - CI->second -= RCCost; - else - Cost.insert(std::make_pair(RCId, -RCCost)); - } - } - - // Visit BBs from header to current BB, if hoisting this doesn't cause - // high register pressure, then it's safe to proceed. - if (!CanCauseHighRegPressure(Cost)) { - ++NumLowRP; - return true; - } - - // Do not "speculate" in high register pressure situation. If an - // instruction is not guaranteed to be executed in the loop, it's best to be - // conservative. - if (AvoidSpeculation && - (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) - return false; - - // High register pressure situation, only hoist if the instruction is going to - // be remat'ed. - if (!TII->isTriviallyReMaterializable(&MI, AA) && - !MI.isInvariantLoad(AA)) - return false; + // Don't hoist a cheap instruction if it would create a copy in the loop. + if (CheapInstr && CreatesCopy) { + DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); + return false; } - // If result(s) of this instruction is used by PHIs outside of the loop, then - // don't hoist it if the instruction because it will introduce an extra copy. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + // Rematerializable instructions should always be hoisted since the register + // allocator can just pull them down again when needed. + if (TII->isTriviallyReMaterializable(&MI, AA)) + return true; + + // Estimate register pressure to determine whether to LICM the instruction. + // In low register pressure situation, we can be more aggressive about + // hoisting. Also, favors hoisting long latency instructions even in + // moderately high pressure situation. + // Cheap instructions will only be hoisted if they don't increase register + // pressure at all. + // FIXME: If there are long latency loop-invariant instructions inside the + // loop at this point, why didn't the optimizer's LICM hoist them? + DenseMap Cost; + for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isDef()) + if (!MO.isReg() || MO.isImplicit()) continue; - if (HasAnyPHIUse(MO.getReg())) - return false; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + unsigned RCId, RCCost; + getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); + if (MO.isDef()) { + if (HasHighOperandLatency(MI, i, Reg)) { + DEBUG(dbgs() << "Hoist High Latency: " << MI); + ++NumHighLatency; + return true; + } + Cost[RCId] += RCCost; + } else if (isOperandKill(MO, MRI)) { + // Is a virtual register use is a kill, hoisting it out of the loop + // may actually reduce register pressure or be register pressure + // neutral. + Cost[RCId] -= RCCost; + } + } + + // Visit BBs from header to current BB, if hoisting this doesn't cause + // high register pressure, then it's safe to proceed. + if (!CanCauseHighRegPressure(Cost, CheapInstr)) { + DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); + ++NumLowRP; + return true; + } + + // Don't risk increasing register pressure if it would create copies. + if (CreatesCopy) { + DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); + return false; + } + + // Do not "speculate" in high register pressure situation. If an + // instruction is not guaranteed to be executed in the loop, it's best to be + // conservative. + if (AvoidSpeculation && + (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { + DEBUG(dbgs() << "Won't speculate: " << MI); + return false; + } + + // High register pressure situation, only hoist if the instruction is going + // to be remat'ed. + if (!TII->isTriviallyReMaterializable(&MI, AA) && + !MI.isInvariantLoad(AA)) { + DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); + return false; } return true; @@ -1087,7 +1241,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { // Don't unfold simple loads. - if (MI->getDesc().canFoldAsLoad()) + if (MI->canFoldAsLoad()) return 0; // If not, we may be able to unfold a load and hoist that. @@ -1123,8 +1277,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { assert(NewMIs.size() == 2 && "Unfolded a load into multiple instructions!"); MachineBasicBlock *MBB = MI->getParent(); - MBB->insert(MI, NewMIs[0]); - MBB->insert(MI, NewMIs[1]); + MachineBasicBlock::iterator Pos = MI; + MBB->insert(Pos, NewMIs[0]); + MBB->insert(Pos, NewMIs[1]); // If unfolding produced a load that wasn't loop-invariant or profitable to // hoist, discard the new instructions and bail. if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { @@ -1180,6 +1335,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, // Replace virtual registers defined by MI by their counterparts defined // by Dup. + SmallVector Defs; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); @@ -1190,11 +1346,33 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, "Instructions with different phys regs are not identical!"); if (MO.isReg() && MO.isDef() && - !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { - MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); - MRI->clearKillFlags(Dup->getOperand(i).getReg()); + !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + Defs.push_back(i); + } + + SmallVector OrigRCs; + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Idx = Defs[i]; + unsigned Reg = MI->getOperand(Idx).getReg(); + unsigned DupReg = Dup->getOperand(Idx).getReg(); + OrigRCs.push_back(MRI->getRegClass(DupReg)); + + if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { + // Restore old RCs if more than one defs. + for (unsigned j = 0; j != i; ++j) + MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]); + return false; } } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Idx = Defs[i]; + unsigned Reg = MI->getOperand(Idx).getReg(); + unsigned DupReg = Dup->getOperand(Idx).getReg(); + MRI->replaceRegWith(Reg, DupReg); + MRI->clearKillFlags(DupReg); + } + MI->eraseFromParent(); ++NumCSEed; return true; diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp index 80c4854238af..ea98b23c6d57 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -257,7 +257,7 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI, : ImmutablePass(ID), Context(MAI, MRI, MOFI), ObjFileMMI(0), CompactUnwindEncoding(0), CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false), - CallsExternalVAFunctionWithFloatingPointArguments(false) { + UsesVAFloatArgument(false) { initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); // Always emit some info, by default "no personality" info. Personalities.push_back(NULL); @@ -268,9 +268,9 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI, MachineModuleInfo::MachineModuleInfo() : ImmutablePass(ID), Context(*(MCAsmInfo*)0, *(MCRegisterInfo*)0, (MCObjectFileInfo*)0) { - assert(0 && "This MachineModuleInfo constructor should never be called, MMI " - "should always be explicitly constructed by LLVMTargetMachine"); - abort(); + llvm_unreachable("This MachineModuleInfo constructor should never be called, " + "MMI should always be explicitly constructed by " + "LLVMTargetMachine"); } MachineModuleInfo::~MachineModuleInfo() { @@ -503,8 +503,7 @@ void MachineModuleInfo::TidyLandingPads(DenseMap *LPMap) { /// indexes. void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym, ArrayRef Sites) { - for (unsigned I = 0, E = Sites.size(); I != E; ++I) - LPadToCallSiteMap[Sym].push_back(Sites[I]); + LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end()); } /// getTypeIDFor - Return the type id for the specified typeinfo. This is @@ -541,8 +540,7 @@ try_next:; // Add the new filter. int FilterID = -(1 + FilterIds.size()); FilterIds.reserve(FilterIds.size() + TyIds.size() + 1); - for (unsigned I = 0, N = TyIds.size(); I != N; ++I) - FilterIds.push_back(TyIds[I]); + FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end()); FilterEnds.push_back(FilterIds.size()); FilterIds.push_back(0); // terminator return FilterID; @@ -561,13 +559,13 @@ unsigned MachineModuleInfo::getPersonalityIndex() const { const Function* Personality = NULL; // Scan landing pads. If there is at least one non-NULL personality - use it. - for (unsigned i = 0; i != LandingPads.size(); ++i) + for (unsigned i = 0, e = LandingPads.size(); i != e; ++i) if (LandingPads[i].Personality) { Personality = LandingPads[i].Personality; break; } - for (unsigned i = 0; i < Personalities.size(); ++i) { + for (unsigned i = 0, e = Personalities.size(); i < e; ++i) { if (Personalities[i] == Personality) return i; } diff --git a/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp index 9f4ef1287803..58e067bcb9b2 100644 --- a/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp +++ b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp @@ -16,6 +16,7 @@ using namespace llvm; +void MachinePassRegistryListener::anchor() { } /// Add - Adds a function pass to the registration list. /// diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 266ebf64a3fc..7ea151713a6d 100644 --- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -18,11 +18,12 @@ using namespace llvm; MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) - : TRI(&TRI), IsSSA(true) { + : TRI(&TRI), IsSSA(true), TracksLiveness(true) { VRegInfo.reserve(256); RegAllocHints.reserve(256); UsedPhysRegs.resize(TRI.getNumRegs()); - + UsedPhysRegMask.resize(TRI.getNumRegs()); + // Create the physreg use/def lists. PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()]; memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs()); @@ -30,9 +31,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) MachineRegisterInfo::~MachineRegisterInfo() { #ifndef NDEBUG - for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) - assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 && - "Vreg use list non-empty still?"); + clearVirtRegs(); for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i) assert(!PhysRegUseDefLists[i] && "PhysRegUseDefLists has entries after all instructions are deleted"); @@ -76,12 +75,14 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { // Accumulate constraints from all uses. for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E; ++I) { - // TRI doesn't have accurate enough information to model this yet. - if (I.getOperand().getSubReg()) - return false; const TargetRegisterClass *OpRC = I->getRegClassConstraint(I.getOperandNo(), TII, TRI); - if (OpRC) + if (unsigned SubIdx = I.getOperand().getSubReg()) { + if (OpRC) + NewRC = TRI->getMatchingSuperRegClass(NewRC, OpRC, SubIdx); + else + NewRC = TRI->getSubClassWithSubReg(NewRC, SubIdx); + } else if (OpRC) NewRC = TRI->getCommonSubClass(NewRC, OpRC); if (!NewRC || NewRC == OldRC) return false; @@ -115,6 +116,16 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ return Reg; } +/// clearVirtRegs - Remove all virtual registers (after physreg assignment). +void MachineRegisterInfo::clearVirtRegs() { +#ifndef NDEBUG + for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) + assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 && + "Vreg use list non-empty still?"); +#endif + VRegInfo.clear(); +} + /// HandleVRegListReallocation - We just added a virtual register to the /// VRegInfo info list and it reallocated. Update the use/def lists info /// pointers. @@ -150,9 +161,8 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { /// form, so there should only be one definition. MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { // Since we are in SSA form, we can use the first definition. - if (!def_empty(Reg)) - return &*def_begin(Reg); - return 0; + def_iterator I = def_begin(Reg); + return !I.atEnd() ? &*I : 0; } bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const { @@ -242,18 +252,31 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, } } -void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) { - for (int i = UsedPhysRegs.find_first(); i >= 0; - i = UsedPhysRegs.find_next(i)) - for (const unsigned *SS = TRI.getSubRegisters(i); - unsigned SubReg = *SS; ++SS) - if (SubReg > unsigned(i)) - UsedPhysRegs.set(SubReg); -} - #ifndef NDEBUG void MachineRegisterInfo::dumpUses(unsigned Reg) const { for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I) I.getOperand().getParent()->dump(); } #endif + +void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) { + ReservedRegs = TRI->getReservedRegs(MF); +} + +bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg, + const MachineFunction &MF) const { + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); + + // Check if any overlapping register is modified. + for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R) + if (!def_empty(*R)) + return false; + + // Check if any overlapping register is allocatable so it may be used later. + if (AllocatableRegs.empty()) + AllocatableRegs = TRI->getAllocatableSet(MF); + for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R) + if (AllocatableRegs.test(*R)) + return false; + return true; +} diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp index 84d6df25397c..070a55704dc5 100644 --- a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -81,7 +81,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB, if (BB->empty()) return 0; - MachineBasicBlock::iterator I = BB->front(); + MachineBasicBlock::iterator I = BB->begin(); if (!I->isPHI()) return 0; @@ -182,7 +182,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { return DupPHI; // Otherwise, we do need a PHI: insert one now. - MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin(); MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, VRC, MRI, TII); @@ -214,7 +214,6 @@ MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI, } llvm_unreachable("MachineOperand::getParent() failure?"); - return 0; } /// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes, @@ -311,7 +310,7 @@ class SSAUpdaterTraits { /// Add it into the specified block and return the register. static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds, MachineSSAUpdater *Updater) { - MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin(); MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, Updater->VRC, Updater->MRI, Updater->TII); diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp new file mode 100644 index 000000000000..1d3241b8cc6b --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -0,0 +1,614 @@ +//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// MachineScheduler schedules machine instructions after phi elimination. It +// preserves LiveIntervals so it can be invoked before register allocation. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "misched" + +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/PriorityQueue.h" + +#include + +using namespace llvm; + +static cl::opt ForceTopDown("misched-topdown", cl::Hidden, + cl::desc("Force top-down list scheduling")); +static cl::opt ForceBottomUp("misched-bottomup", cl::Hidden, + cl::desc("Force bottom-up list scheduling")); + +#ifndef NDEBUG +static cl::opt ViewMISchedDAGs("view-misched-dags", cl::Hidden, + cl::desc("Pop up a window to show MISched dags after they are processed")); + +static cl::opt MISchedCutoff("misched-cutoff", cl::Hidden, + cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); +#else +static bool ViewMISchedDAGs = false; +#endif // NDEBUG + +//===----------------------------------------------------------------------===// +// Machine Instruction Scheduling Pass and Registry +//===----------------------------------------------------------------------===// + +namespace { +/// MachineScheduler runs after coalescing and before register allocation. +class MachineScheduler : public MachineSchedContext, + public MachineFunctionPass { +public: + MachineScheduler(); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + virtual void releaseMemory() {} + + virtual bool runOnMachineFunction(MachineFunction&); + + virtual void print(raw_ostream &O, const Module* = 0) const; + + static char ID; // Class identification, replacement for typeinfo +}; +} // namespace + +char MachineScheduler::ID = 0; + +char &llvm::MachineSchedulerID = MachineScheduler::ID; + +INITIALIZE_PASS_BEGIN(MachineScheduler, "misched", + "Machine Instruction Scheduler", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(MachineScheduler, "misched", + "Machine Instruction Scheduler", false, false) + +MachineScheduler::MachineScheduler() +: MachineFunctionPass(ID) { + initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); +} + +void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequiredID(MachineDominatorsID); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachinePassRegistry MachineSchedRegistry::Registry; + +/// A dummy default scheduler factory indicates whether the scheduler +/// is overridden on the command line. +static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { + return 0; +} + +/// MachineSchedOpt allows command line selection of the scheduler. +static cl::opt > +MachineSchedOpt("misched", + cl::init(&useDefaultMachineSched), cl::Hidden, + cl::desc("Machine instruction scheduler to use")); + +static MachineSchedRegistry +DefaultSchedRegistry("default", "Use the target's default scheduler choice.", + useDefaultMachineSched); + +/// Forward declare the standard machine scheduler. This will be used as the +/// default scheduler if the target does not set a default. +static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); + +/// Top-level MachineScheduler pass driver. +/// +/// Visit blocks in function order. Divide each block into scheduling regions +/// and visit them bottom-up. Visiting regions bottom-up is not required, but is +/// consistent with the DAG builder, which traverses the interior of the +/// scheduling regions bottom-up. +/// +/// This design avoids exposing scheduling boundaries to the DAG builder, +/// simplifying the DAG builder's support for "special" target instructions. +/// At the same time the design allows target schedulers to operate across +/// scheduling boundaries, for example to bundle the boudary instructions +/// without reordering them. This creates complexity, because the target +/// scheduler must update the RegionBegin and RegionEnd positions cached by +/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler +/// design would be to split blocks at scheduling boundaries, but LLVM has a +/// general bias against block splitting purely for implementation simplicity. +bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { + // Initialize the context of the pass. + MF = &mf; + MLI = &getAnalysis(); + MDT = &getAnalysis(); + PassConfig = &getAnalysis(); + AA = &getAnalysis(); + + LIS = &getAnalysis(); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + + // Select the scheduler, or set the default. + MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; + if (Ctor == useDefaultMachineSched) { + // Get the default scheduler set by the target. + Ctor = MachineSchedRegistry::getDefault(); + if (!Ctor) { + Ctor = createConvergingSched; + MachineSchedRegistry::setDefault(Ctor); + } + } + // Instantiate the selected scheduler. + OwningPtr Scheduler(Ctor(this)); + + // Visit all machine basic blocks. + for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); + MBB != MBBEnd; ++MBB) { + + Scheduler->startBlock(MBB); + + // Break the block into scheduling regions [I, RegionEnd), and schedule each + // region as soon as it is discovered. RegionEnd points the the scheduling + // boundary at the bottom of the region. The DAG does not include RegionEnd, + // but the region does (i.e. the next RegionEnd is above the previous + // RegionBegin). If the current block has no terminator then RegionEnd == + // MBB->end() for the bottom region. + // + // The Scheduler may insert instructions during either schedule() or + // exitRegion(), even for empty regions. So the local iterators 'I' and + // 'RegionEnd' are invalid across these calls. + unsigned RemainingCount = MBB->size(); + for(MachineBasicBlock::iterator RegionEnd = MBB->end(); + RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { + // Avoid decrementing RegionEnd for blocks with no terminator. + if (RegionEnd != MBB->end() + || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { + --RegionEnd; + // Count the boundary instruction. + --RemainingCount; + } + + // The next region starts above the previous region. Look backward in the + // instruction stream until we find the nearest boundary. + MachineBasicBlock::iterator I = RegionEnd; + for(;I != MBB->begin(); --I, --RemainingCount) { + if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) + break; + } + // Notify the scheduler of the region, even if we may skip scheduling + // it. Perhaps it still needs to be bundled. + Scheduler->enterRegion(MBB, I, RegionEnd, RemainingCount); + + // Skip empty scheduling regions (0 or 1 schedulable instructions). + if (I == RegionEnd || I == llvm::prior(RegionEnd)) { + // Close the current region. Bundle the terminator if needed. + // This invalidates 'RegionEnd' and 'I'. + Scheduler->exitRegion(); + continue; + } + DEBUG(dbgs() << "MachineScheduling " << MF->getFunction()->getName() + << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; + if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; + else dbgs() << "End"; + dbgs() << " Remaining: " << RemainingCount << "\n"); + + // Schedule a region: possibly reorder instructions. + // This invalidates 'RegionEnd' and 'I'. + Scheduler->schedule(); + + // Close the current region. + Scheduler->exitRegion(); + + // Scheduling has invalidated the current iterator 'I'. Ask the + // scheduler for the top of it's scheduled region. + RegionEnd = Scheduler->begin(); + } + assert(RemainingCount == 0 && "Instruction count mismatch!"); + Scheduler->finishBlock(); + } + Scheduler->finalizeSchedule(); + DEBUG(LIS->print(dbgs())); + return true; +} + +void MachineScheduler::print(raw_ostream &O, const Module* m) const { + // unimplemented +} + +//===----------------------------------------------------------------------===// +// MachineSchedStrategy - Interface to a machine scheduling algorithm. +//===----------------------------------------------------------------------===// + +namespace { +class ScheduleDAGMI; + +/// MachineSchedStrategy - Interface used by ScheduleDAGMI to drive the selected +/// scheduling algorithm. +/// +/// If this works well and targets wish to reuse ScheduleDAGMI, we may expose it +/// in ScheduleDAGInstrs.h +class MachineSchedStrategy { +public: + virtual ~MachineSchedStrategy() {} + + /// Initialize the strategy after building the DAG for a new region. + virtual void initialize(ScheduleDAGMI *DAG) = 0; + + /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to + /// schedule the node at the top of the unscheduled region. Otherwise it will + /// be scheduled at the bottom. + virtual SUnit *pickNode(bool &IsTopNode) = 0; + + /// When all predecessor dependencies have been resolved, free this node for + /// top-down scheduling. + virtual void releaseTopNode(SUnit *SU) = 0; + /// When all successor dependencies have been resolved, free this node for + /// bottom-up scheduling. + virtual void releaseBottomNode(SUnit *SU) = 0; +}; +} // namespace + +//===----------------------------------------------------------------------===// +// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals +// preservation. +//===----------------------------------------------------------------------===// + +namespace { +/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules +/// machine instructions while updating LiveIntervals. +class ScheduleDAGMI : public ScheduleDAGInstrs { + AliasAnalysis *AA; + MachineSchedStrategy *SchedImpl; + + /// The top of the unscheduled zone. + MachineBasicBlock::iterator CurrentTop; + + /// The bottom of the unscheduled zone. + MachineBasicBlock::iterator CurrentBottom; + + /// The number of instructions scheduled so far. Used to cut off the + /// scheduler at the point determined by misched-cutoff. + unsigned NumInstrsScheduled; +public: + ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S): + ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS), + AA(C->AA), SchedImpl(S), CurrentTop(), CurrentBottom(), + NumInstrsScheduled(0) {} + + ~ScheduleDAGMI() { + delete SchedImpl; + } + + MachineBasicBlock::iterator top() const { return CurrentTop; } + MachineBasicBlock::iterator bottom() const { return CurrentBottom; } + + /// Implement ScheduleDAGInstrs interface. + void schedule(); + +protected: + void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); + bool checkSchedLimit(); + + void releaseSucc(SUnit *SU, SDep *SuccEdge); + void releaseSuccessors(SUnit *SU); + void releasePred(SUnit *SU, SDep *PredEdge); + void releasePredecessors(SUnit *SU); +}; +} // namespace + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When +/// NumPredsLeft reaches zero, release the successor node. +void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { + SUnit *SuccSU = SuccEdge->getSUnit(); + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --SuccSU->NumPredsLeft; + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) + SchedImpl->releaseTopNode(SuccSU); +} + +/// releaseSuccessors - Call releaseSucc on each of SU's successors. +void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + releaseSucc(SU, &*I); + } +} + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When +/// NumSuccsLeft reaches zero, release the predecessor node. +void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --PredSU->NumSuccsLeft; + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) + SchedImpl->releaseBottomNode(PredSU); +} + +/// releasePredecessors - Call releasePred on each of SU's predecessors. +void ScheduleDAGMI::releasePredecessors(SUnit *SU) { + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + releasePred(SU, &*I); + } +} + +void ScheduleDAGMI::moveInstruction(MachineInstr *MI, + MachineBasicBlock::iterator InsertPos) { + // Fix RegionBegin if the first instruction moves down. + if (&*RegionBegin == MI) + RegionBegin = llvm::next(RegionBegin); + BB->splice(InsertPos, BB, MI); + LIS->handleMove(MI); + // Fix RegionBegin if another instruction moves above the first instruction. + if (RegionBegin == InsertPos) + RegionBegin = MI; +} + +bool ScheduleDAGMI::checkSchedLimit() { +#ifndef NDEBUG + if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { + CurrentTop = CurrentBottom; + return false; + } + ++NumInstrsScheduled; +#endif + return true; +} + +/// schedule - Called back from MachineScheduler::runOnMachineFunction +/// after setting up the current scheduling region. +void ScheduleDAGMI::schedule() { + buildSchedGraph(AA); + + DEBUG(dbgs() << "********** MI Scheduling **********\n"); + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + + if (ViewMISchedDAGs) viewGraph(); + + SchedImpl->initialize(this); + + // Release edges from the special Entry node or to the special Exit node. + releaseSuccessors(&EntrySU); + releasePredecessors(&ExitSU); + + // Release all DAG roots for scheduling. + for (std::vector::iterator I = SUnits.begin(), E = SUnits.end(); + I != E; ++I) { + // A SUnit is ready to top schedule if it has no predecessors. + if (I->Preds.empty()) + SchedImpl->releaseTopNode(&(*I)); + // A SUnit is ready to bottom schedule if it has no successors. + if (I->Succs.empty()) + SchedImpl->releaseBottomNode(&(*I)); + } + + CurrentTop = RegionBegin; + CurrentBottom = RegionEnd; + bool IsTopNode = false; + while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") + << " Scheduling Instruction:\n"; SU->dump(this)); + if (!checkSchedLimit()) + break; + + // Move the instruction to its new location in the instruction stream. + MachineInstr *MI = SU->getInstr(); + + if (IsTopNode) { + assert(SU->isTopReady() && "node still has unscheduled dependencies"); + if (&*CurrentTop == MI) + ++CurrentTop; + else + moveInstruction(MI, CurrentTop); + // Release dependent instructions for scheduling. + releaseSuccessors(SU); + } + else { + assert(SU->isBottomReady() && "node still has unscheduled dependencies"); + if (&*llvm::prior(CurrentBottom) == MI) + --CurrentBottom; + else { + if (&*CurrentTop == MI) + CurrentTop = llvm::next(CurrentTop); + moveInstruction(MI, CurrentBottom); + CurrentBottom = MI; + } + // Release dependent instructions for scheduling. + releasePredecessors(SU); + } + SU->isScheduled = true; + } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); +} + +//===----------------------------------------------------------------------===// +// ConvergingScheduler - Implementation of the standard MachineSchedStrategy. +//===----------------------------------------------------------------------===// + +namespace { +/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance +/// the schedule. +class ConvergingScheduler : public MachineSchedStrategy { + ScheduleDAGMI *DAG; + + unsigned NumTopReady; + unsigned NumBottomReady; + +public: + virtual void initialize(ScheduleDAGMI *dag) { + DAG = dag; + + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); + } + + virtual SUnit *pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) + return NULL; + + // As an initial placeholder heuristic, schedule in the direction that has + // the fewest choices. + SUnit *SU; + if (ForceTopDown || (!ForceBottomUp && NumTopReady <= NumBottomReady)) { + SU = DAG->getSUnit(DAG->top()); + IsTopNode = true; + } + else { + SU = DAG->getSUnit(llvm::prior(DAG->bottom())); + IsTopNode = false; + } + if (SU->isTopReady()) { + assert(NumTopReady > 0 && "bad ready count"); + --NumTopReady; + } + if (SU->isBottomReady()) { + assert(NumBottomReady > 0 && "bad ready count"); + --NumBottomReady; + } + return SU; + } + + virtual void releaseTopNode(SUnit *SU) { + ++NumTopReady; + } + virtual void releaseBottomNode(SUnit *SU) { + ++NumBottomReady; + } +}; +} // namespace + +/// Create the standard converging machine scheduler. This will be used as the +/// default scheduler if the target does not set a default. +static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); + return new ScheduleDAGMI(C, new ConvergingScheduler()); +} +static MachineSchedRegistry +ConvergingSchedRegistry("converge", "Standard converging scheduler.", + createConvergingSched); + +//===----------------------------------------------------------------------===// +// Machine Instruction Shuffler for Correctness Testing +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +namespace { +/// Apply a less-than relation on the node order, which corresponds to the +/// instruction order prior to scheduling. IsReverse implements greater-than. +template +struct SUnitOrder { + bool operator()(SUnit *A, SUnit *B) const { + if (IsReverse) + return A->NodeNum > B->NodeNum; + else + return A->NodeNum < B->NodeNum; + } +}; + +/// Reorder instructions as much as possible. +class InstructionShuffler : public MachineSchedStrategy { + bool IsAlternating; + bool IsTopDown; + + // Using a less-than relation (SUnitOrder) for the TopQ priority + // gives nodes with a higher number higher priority causing the latest + // instructions to be scheduled first. + PriorityQueue, SUnitOrder > + TopQ; + // When scheduling bottom-up, use greater-than as the queue priority. + PriorityQueue, SUnitOrder > + BottomQ; +public: + InstructionShuffler(bool alternate, bool topdown) + : IsAlternating(alternate), IsTopDown(topdown) {} + + virtual void initialize(ScheduleDAGMI *) { + TopQ.clear(); + BottomQ.clear(); + } + + /// Implement MachineSchedStrategy interface. + /// ----------------------------------------- + + virtual SUnit *pickNode(bool &IsTopNode) { + SUnit *SU; + if (IsTopDown) { + do { + if (TopQ.empty()) return NULL; + SU = TopQ.top(); + TopQ.pop(); + } while (SU->isScheduled); + IsTopNode = true; + } + else { + do { + if (BottomQ.empty()) return NULL; + SU = BottomQ.top(); + BottomQ.pop(); + } while (SU->isScheduled); + IsTopNode = false; + } + if (IsAlternating) + IsTopDown = !IsTopDown; + return SU; + } + + virtual void releaseTopNode(SUnit *SU) { + TopQ.push(SU); + } + virtual void releaseBottomNode(SUnit *SU) { + BottomQ.push(SU); + } +}; +} // namespace + +static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { + bool Alternate = !ForceTopDown && !ForceBottomUp; + bool TopDown = !ForceBottomUp; + assert((TopDown || !ForceTopDown) && + "-misched-topdown incompatible with -misched-bottomup"); + return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown)); +} +static MachineSchedRegistry ShufflerRegistry( + "shuffle", "Shuffle machine instructions alternating directions", + createInstructionShuffler); +#endif // !NDEBUG diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp index 29cfb49953b9..1ce546b578ad 100644 --- a/contrib/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -32,7 +32,7 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -static cl::opt +static cl::opt SplitEdges("machine-sink-split", cl::desc("Split critical edges during machine sinking"), cl::init(true), cl::Hidden); @@ -90,12 +90,19 @@ namespace { bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB, MachineBasicBlock *DefMBB, bool &BreakPHIEdge, bool &LocalUse) const; + MachineBasicBlock *FindSuccToSinkTo(MachineInstr *MI, MachineBasicBlock *MBB, + bool &BreakPHIEdge); + bool isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, + MachineBasicBlock *MBB, + MachineBasicBlock *SuccToSinkTo); + bool PerformTrivialForwardCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); }; } // end anonymous namespace char MachineSinking::ID = 0; +char &llvm::MachineSinkingID = MachineSinking::ID; INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink", "Machine code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) @@ -104,8 +111,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(MachineSinking, "machine-sink", "Machine code sinking", false, false) -FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); } - bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI, MachineBasicBlock *MBB) { if (!MI->isCopy()) @@ -147,14 +152,10 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Only makes sense for vregs"); + // Ignore debug uses because debug info doesn't affect the code. if (MRI->use_nodbg_empty(Reg)) return true; - // Ignoring debug uses is necessary so debug info doesn't affect the code. - // This may leave a referencing dbg_value in the original block, before - // the definition of the vreg. Dwarf generator handles this although the - // user might not get the right info at runtime. - // BreakPHIEdge is true if all the uses are in the successor MBB being sunken // into and they are all PHI nodes. In this case, machine-sink must break // the critical edge first. e.g. @@ -291,7 +292,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI, if (!CEBCandidates.insert(std::make_pair(From, To))) return true; - if (!MI->isCopy() && !MI->getDesc().isAsCheapAsAMove()) + if (!MI->isCopy() && !MI->isAsCheapAsAMove()) return true; // MI is cheap, we probably don't want to break the critical edge for it. @@ -382,9 +383,9 @@ static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) { return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence(); } -/// collectDebgValues - Scan instructions following MI and collect any +/// collectDebgValues - Scan instructions following MI and collect any /// matching DBG_VALUEs. -static void collectDebugValues(MachineInstr *MI, +static void collectDebugValues(MachineInstr *MI, SmallVector & DbgValues) { DbgValues.clear(); if (!MI->getOperand(0).isReg()) @@ -401,6 +402,165 @@ static void collectDebugValues(MachineInstr *MI, } } +/// isPostDominatedBy - Return true if A is post dominated by B. +static bool isPostDominatedBy(MachineBasicBlock *A, MachineBasicBlock *B) { + + // FIXME - Use real post dominator. + if (A->succ_size() != 2) + return false; + MachineBasicBlock::succ_iterator I = A->succ_begin(); + if (B == *I) + ++I; + MachineBasicBlock *OtherSuccBlock = *I; + if (OtherSuccBlock->succ_size() != 1 || + *(OtherSuccBlock->succ_begin()) != B) + return false; + + return true; +} + +/// isProfitableToSinkTo - Return true if it is profitable to sink MI. +bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, + MachineBasicBlock *MBB, + MachineBasicBlock *SuccToSinkTo) { + assert (MI && "Invalid MachineInstr!"); + assert (SuccToSinkTo && "Invalid SinkTo Candidate BB"); + + if (MBB == SuccToSinkTo) + return false; + + // It is profitable if SuccToSinkTo does not post dominate current block. + if (!isPostDominatedBy(MBB, SuccToSinkTo)) + return true; + + // Check if only use in post dominated block is PHI instruction. + bool NonPHIUse = false; + for (MachineRegisterInfo::use_nodbg_iterator + I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); + I != E; ++I) { + MachineInstr *UseInst = &*I; + MachineBasicBlock *UseBlock = UseInst->getParent(); + if (UseBlock == SuccToSinkTo && !UseInst->isPHI()) + NonPHIUse = true; + } + if (!NonPHIUse) + return true; + + // If SuccToSinkTo post dominates then also it may be profitable if MI + // can further profitably sinked into another block in next round. + bool BreakPHIEdge = false; + // FIXME - If finding successor is compile time expensive then catch results. + if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge)) + return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2); + + // If SuccToSinkTo is final destination and it is a post dominator of current + // block then it is not profitable to sink MI into SuccToSinkTo block. + return false; +} + +/// FindSuccToSinkTo - Find a successor to sink this instruction to. +MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, + MachineBasicBlock *MBB, + bool &BreakPHIEdge) { + + assert (MI && "Invalid MachineInstr!"); + assert (MBB && "Invalid MachineBasicBlock!"); + + // Loop over all the operands of the specified instruction. If there is + // anything we can't handle, bail out. + + // SuccToSinkTo - This is the successor to sink this instruction to, once we + // decide. + MachineBasicBlock *SuccToSinkTo = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; // Ignore non-register operands. + + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) + return NULL; + } else if (!MO.isDead()) { + // A def that isn't dead. We can't move it. + return NULL; + } + } else { + // Virtual register uses are always safe to sink. + if (MO.isUse()) continue; + + // If it's not safe to move defs of the register class, then abort. + if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg))) + return NULL; + + // FIXME: This picks a successor to sink into based on having one + // successor that dominates all the uses. However, there are cases where + // sinking can happen but where the sink point isn't a successor. For + // example: + // + // x = computation + // if () {} else {} + // use x + // + // the instruction could be sunk over the whole diamond for the + // if/then/else (or loop, etc), allowing it to be sunk into other blocks + // after that. + + // Virtual register defs can only be sunk if all their uses are in blocks + // dominated by one of the successors. + if (SuccToSinkTo) { + // If a previous operand picked a block to sink to, then this operand + // must be sinkable to the same block. + bool LocalUse = false; + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, + BreakPHIEdge, LocalUse)) + return NULL; + + continue; + } + + // Otherwise, we should look at all the successors and decide which one + // we should sink to. + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + E = MBB->succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccBlock = *SI; + bool LocalUse = false; + if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB, + BreakPHIEdge, LocalUse)) { + SuccToSinkTo = SuccBlock; + break; + } + if (LocalUse) + // Def is used locally, it's never safe to move this def. + return NULL; + } + + // If we couldn't find a block to sink to, ignore this instruction. + if (SuccToSinkTo == 0) + return NULL; + else if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo)) + return NULL; + } + } + + // It is not possible to sink an instruction into its own block. This can + // happen with loops. + if (MBB == SuccToSinkTo) + return NULL; + + // It's not safe to sink instructions to EH landing pad. Control flow into + // landing pad is implicitly defined. + if (SuccToSinkTo && SuccToSinkTo->isLandingPad()) + return NULL; + + return SuccToSinkTo; +} + /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { @@ -421,114 +581,14 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // "x = y + z" down if it kills y and z would increase the live ranges of y // and z and only shrink the live range of x. - // Loop over all the operands of the specified instruction. If there is - // anything we can't handle, bail out. - MachineBasicBlock *ParentBlock = MI->getParent(); - - // SuccToSinkTo - This is the successor to sink this instruction to, once we - // decide. - MachineBasicBlock *SuccToSinkTo = 0; - bool BreakPHIEdge = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; // Ignore non-register operands. - - unsigned Reg = MO.getReg(); - if (Reg == 0) continue; - - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (MO.isUse()) { - // If the physreg has no defs anywhere, it's just an ambient register - // and we can freely move its uses. Alternatively, if it's allocatable, - // it could get allocated to something with a def during allocation. - if (!MRI->def_empty(Reg)) - return false; - - if (AllocatableSet.test(Reg)) - return false; - - // Check for a def among the register's aliases too. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - if (!MRI->def_empty(AliasReg)) - return false; - - if (AllocatableSet.test(AliasReg)) - return false; - } - } else if (!MO.isDead()) { - // A def that isn't dead. We can't move it. - return false; - } - } else { - // Virtual register uses are always safe to sink. - if (MO.isUse()) continue; - - // If it's not safe to move defs of the register class, then abort. - if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg))) - return false; - - // FIXME: This picks a successor to sink into based on having one - // successor that dominates all the uses. However, there are cases where - // sinking can happen but where the sink point isn't a successor. For - // example: - // - // x = computation - // if () {} else {} - // use x - // - // the instruction could be sunk over the whole diamond for the - // if/then/else (or loop, etc), allowing it to be sunk into other blocks - // after that. - - // Virtual register defs can only be sunk if all their uses are in blocks - // dominated by one of the successors. - if (SuccToSinkTo) { - // If a previous operand picked a block to sink to, then this operand - // must be sinkable to the same block. - bool LocalUse = false; - if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock, - BreakPHIEdge, LocalUse)) - return false; - - continue; - } - - // Otherwise, we should look at all the successors and decide which one - // we should sink to. - for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(), - E = ParentBlock->succ_end(); SI != E; ++SI) { - bool LocalUse = false; - if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock, - BreakPHIEdge, LocalUse)) { - SuccToSinkTo = *SI; - break; - } - if (LocalUse) - // Def is used locally, it's never safe to move this def. - return false; - } - - // If we couldn't find a block to sink to, ignore this instruction. - if (SuccToSinkTo == 0) - return false; - } - } + MachineBasicBlock *ParentBlock = MI->getParent(); + MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge); // If there are no outputs, it must have side-effects. if (SuccToSinkTo == 0) return false; - // It's not safe to sink instructions to EH landing pad. Control flow into - // landing pad is implicitly defined. - if (SuccToSinkTo->isLandingPad()) - return false; - - // It is not possible to sink an instruction into its own block. This can - // happen with loops. - if (MI->getParent() == SuccToSinkTo) - return false; // If the instruction to move defines a dead physical register which is live // when leaving the basic block, don't move it because it could turn into a diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index 26847d39e7ad..74ba94d1fcc0 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -69,14 +70,17 @@ namespace { unsigned foundErrors; typedef SmallVector RegVector; + typedef SmallVector RegMaskVector; typedef DenseSet RegSet; typedef DenseMap RegMap; const MachineInstr *FirstTerminator; BitVector regsReserved; + BitVector regsAllocatable; RegSet regsLive; RegVector regsDefined, regsDead, regsKilled; + RegMaskVector regMasks; RegSet regsLiveInButUnused; SlotIndex lastIndex; @@ -85,7 +89,7 @@ namespace { void addRegWithSubRegs(RegVector &RV, unsigned Reg) { RV.push_back(Reg); if (TargetRegisterInfo::isPhysicalRegister(Reg)) - for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++) + for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++) RV.push_back(*R); } @@ -175,6 +179,10 @@ namespace { return Reg < regsReserved.size() && regsReserved.test(Reg); } + bool isAllocatable(unsigned Reg) { + return Reg < regsAllocatable.size() && regsAllocatable.test(Reg); + } + // Analysis information if available LiveVariables *LiveVars; LiveIntervals *LiveInts; @@ -194,6 +202,7 @@ namespace { void report(const char *msg, const MachineInstr *MI); void report(const char *msg, const MachineOperand *MO, unsigned MONum); + void checkLiveness(const MachineOperand *MO, unsigned MONum); void markReachable(const MachineBasicBlock *MBB); void calcRegsPassed(); void checkPHIOps(const MachineBasicBlock *MBB); @@ -279,13 +288,17 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end(); MFI!=MFE; ++MFI) { visitMachineBasicBlockBefore(MFI); - for (MachineBasicBlock::const_iterator MBBI = MFI->begin(), - MBBE = MFI->end(); MBBI != MBBE; ++MBBI) { + for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(), + MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) { if (MBBI->getParent() != MFI) { report("Bad instruction parent pointer", MFI); *OS << "Instruction: " << *MBBI; continue; } + // Skip BUNDLE instruction for now. FIXME: We should add code to verify + // the BUNDLE's specifically. + if (MBBI->isBundle()) + continue; visitMachineInstrBefore(MBBI); for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) visitMachineOperand(&MBBI->getOperand(I), I); @@ -305,6 +318,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { regsDefined.clear(); regsDead.clear(); regsKilled.clear(); + regMasks.clear(); regsLiveInButUnused.clear(); MBBInfoMap.clear(); @@ -320,7 +334,7 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) { MF->print(*OS, Indexes); } *OS << "*** Bad machine code: " << msg << " ***\n" - << "- function: " << MF->getFunction()->getNameStr() << "\n"; + << "- function: " << MF->getFunction()->getName() << "\n"; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { @@ -370,12 +384,15 @@ void MachineVerifier::visitMachineFunctionBefore() { // A sub-register of a reserved register is also reserved for (int Reg = regsReserved.find_first(); Reg>=0; Reg = regsReserved.find_next(Reg)) { - for (const unsigned *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) { + for (const uint16_t *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) { // FIXME: This should probably be: // assert(regsReserved.test(*Sub) && "Non-reserved sub-register"); regsReserved.set(*Sub); } } + + regsAllocatable = TRI->getAllocatableSet(*MF); + markReachable(&MF->front()); } @@ -393,6 +410,20 @@ void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { FirstTerminator = 0; + if (MRI->isSSA()) { + // If this block has allocatable physical registers live-in, check that + // it is an entry block or landing pad. + for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(), + LE = MBB->livein_end(); + LI != LE; ++LI) { + unsigned reg = *LI; + if (isAllocatable(reg) && !MBB->isLandingPad() && + MBB != MBB->getParent()->begin()) { + report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB); + } + } + } + // Count the number of landing pad successors. SmallPtrSet LandingPadSuccs; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), @@ -435,7 +466,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB exits via unconditional fall-through but its successor " "differs from its CFG successor!", MBB); } - if (!MBB->empty() && MBB->back().getDesc().isBarrier() && + if (!MBB->empty() && MBB->back().isBarrier() && !TII->isPredicated(&MBB->back())) { report("MBB exits via unconditional fall-through but ends with a " "barrier instruction!", MBB); @@ -456,10 +487,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via unconditional branch but doesn't contain " "any instructions!", MBB); - } else if (!MBB->back().getDesc().isBarrier()) { + } else if (!MBB->back().isBarrier()) { report("MBB exits via unconditional branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!MBB->back().getDesc().isTerminator()) { + } else if (!MBB->back().isTerminator()) { report("MBB exits via unconditional branch but the branch isn't a " "terminator instruction!", MBB); } @@ -479,10 +510,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/fall-through but doesn't " "contain any instructions!", MBB); - } else if (MBB->back().getDesc().isBarrier()) { + } else if (MBB->back().isBarrier()) { report("MBB exits via conditional branch/fall-through but ends with a " "barrier instruction!", MBB); - } else if (!MBB->back().getDesc().isTerminator()) { + } else if (!MBB->back().isTerminator()) { report("MBB exits via conditional branch/fall-through but the branch " "isn't a terminator instruction!", MBB); } @@ -499,10 +530,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/branch but doesn't " "contain any instructions!", MBB); - } else if (!MBB->back().getDesc().isBarrier()) { + } else if (!MBB->back().isBarrier()) { report("MBB exits via conditional branch/branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!MBB->back().getDesc().isTerminator()) { + } else if (!MBB->back().isTerminator()) { report("MBB exits via conditional branch/branch but the branch " "isn't a terminator instruction!", MBB); } @@ -523,7 +554,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { continue; } regsLive.insert(*I); - for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++) + for (const uint16_t *R = TRI->getSubRegisters(*I); *R; R++) regsLive.insert(*R); } regsLiveInButUnused = regsLive; @@ -533,7 +564,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { BitVector PR = MFI->getPristineRegs(MBB); for (int I = PR.find_first(); I>0; I = PR.find_next(I)) { regsLive.insert(I); - for (const unsigned *R = TRI->getSubRegisters(I); *R; R++) + for (const uint16_t *R = TRI->getSubRegisters(I); *R; R++) regsLive.insert(*R); } @@ -555,19 +586,22 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { // Check the MachineMemOperands for basic consistency. for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), E = MI->memoperands_end(); I != E; ++I) { - if ((*I)->isLoad() && !MCID.mayLoad()) + if ((*I)->isLoad() && !MI->mayLoad()) report("Missing mayLoad flag", MI); - if ((*I)->isStore() && !MCID.mayStore()) + if ((*I)->isStore() && !MI->mayStore()) report("Missing mayStore flag", MI); } // Debug values must not have a slot index. - // Other instructions must have one. + // Other instructions must have one, unless they are inside a bundle. if (LiveInts) { bool mapped = !LiveInts->isNotInMIMap(MI); if (MI->isDebugValue()) { if (mapped) report("Debug instruction has a slot index", MI); + } else if (MI->isInsideBundle()) { + if (mapped) + report("Instruction inside bundle has a slot index", MI); } else { if (!mapped) report("Missing slot index", MI); @@ -575,7 +609,9 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } // Ensure non-terminators don't follow terminators. - if (MCID.isTerminator()) { + // Ignore predicated terminators formed by if conversion. + // FIXME: If conversion shouldn't need to violate this rule. + if (MI->isTerminator() && !TII->isPredicated(MI)) { if (!FirstTerminator) FirstTerminator = MI; } else if (FirstTerminator) { @@ -606,7 +642,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // Don't check if it's the last operand in a variadic instruction. See, // e.g., LDM_RET in the arm back end. if (MO->isReg() && - !(MCID.isVariadic() && MONum == MCID.getNumOperands()-1)) { + !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) { if (MO->isDef() && !MCOI.isOptionalDef()) report("Explicit operand marked as def", MO, MONum); if (MO->isImplicit()) @@ -614,7 +650,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } } else { // ARM adds %reg0 operands to indicate predicates. We'll allow that. - if (MO->isReg() && !MO->isImplicit() && !MCID.isVariadic() && MO->getReg()) + if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg()) report("Extra explicit operand on non-variadic instruction", MO, MONum); } @@ -623,112 +659,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { const unsigned Reg = MO->getReg(); if (!Reg) return; + if (MRI->tracksLiveness() && !MI->isDebugValue()) + checkLiveness(MO, MONum); - // Check Live Variables. - if (MI->isDebugValue()) { - // Liveness checks are not valid for debug values. - } else if (MO->isUse() && !MO->isUndef()) { - regsLiveInButUnused.erase(Reg); - - bool isKill = false; - unsigned defIdx; - if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { - // A two-addr use counts as a kill if use and def are the same. - unsigned DefReg = MI->getOperand(defIdx).getReg(); - if (Reg == DefReg) - isKill = true; - else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - report("Two-address instruction operands must be identical", - MO, MONum); - } - } else - isKill = MO->isKill(); - - if (isKill) - addRegWithSubRegs(regsKilled, Reg); - - // Check that LiveVars knows this kill. - if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) && - MO->isKill()) { - LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); - if (std::find(VI.Kills.begin(), - VI.Kills.end(), MI) == VI.Kills.end()) - report("Kill missing from LiveVariables", MO, MONum); - } - - // Check LiveInts liveness and kill. - if (TargetRegisterInfo::isVirtualRegister(Reg) && - LiveInts && !LiveInts->isNotInMIMap(MI)) { - SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex(); - if (LiveInts->hasInterval(Reg)) { - const LiveInterval &LI = LiveInts->getInterval(Reg); - if (!LI.liveAt(UseIdx)) { - report("No live range at use", MO, MONum); - *OS << UseIdx << " is not live in " << LI << '\n'; - } - // Check for extra kill flags. - // Note that we allow missing kill flags for now. - if (MO->isKill() && !LI.killedAt(UseIdx.getDefIndex())) { - report("Live range continues after kill flag", MO, MONum); - *OS << "Live range: " << LI << '\n'; - } - } else { - report("Virtual register has no Live interval", MO, MONum); - } - } - - // Use of a dead register. - if (!regsLive.count(Reg)) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - // Reserved registers may be used even when 'dead'. - if (!isReserved(Reg)) - report("Using an undefined physical register", MO, MONum); - } else { - BBInfo &MInfo = MBBInfoMap[MI->getParent()]; - // We don't know which virtual registers are live in, so only complain - // if vreg was killed in this MBB. Otherwise keep track of vregs that - // must be live in. PHI instructions are handled separately. - if (MInfo.regsKilled.count(Reg)) - report("Using a killed virtual register", MO, MONum); - else if (!MI->isPHI()) - MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI)); - } - } - } else if (MO->isDef()) { - // Register defined. - // TODO: verify that earlyclobber ops are not used. - if (MO->isDead()) - addRegWithSubRegs(regsDead, Reg); - else - addRegWithSubRegs(regsDefined, Reg); - - // Verify SSA form. - if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) && - llvm::next(MRI->def_begin(Reg)) != MRI->def_end()) - report("Multiple virtual register defs in SSA form", MO, MONum); - - // Check LiveInts for a live range, but only for virtual registers. - if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && - !LiveInts->isNotInMIMap(MI)) { - SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex(); - if (LiveInts->hasInterval(Reg)) { - const LiveInterval &LI = LiveInts->getInterval(Reg); - if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) { - assert(VNI && "NULL valno is not allowed"); - if (VNI->def != DefIdx && !MO->isEarlyClobber()) { - report("Inconsistent valno->def", MO, MONum); - *OS << "Valno " << VNI->id << " is not defined at " - << DefIdx << " in " << LI << '\n'; - } - } else { - report("No live range at def", MO, MONum); - *OS << DefIdx << " is not live in " << LI << '\n'; - } - } else { - report("Virtual register has no Live interval", MO, MONum); - } - } - } // Check register classes. if (MONum < MCID.getNumOperands() && !MO->isImplicit()) { @@ -790,6 +723,10 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { break; } + case MachineOperand::MO_RegisterMask: + regMasks.push_back(MO->getRegMask()); + break; + case MachineOperand::MO_MachineBasicBlock: if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent())) report("PHI operand is not in the CFG", MO, MONum); @@ -800,11 +737,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { LiveInts && !LiveInts->isNotInMIMap(MI)) { LiveInterval &LI = LiveStks->getInterval(MO->getIndex()); SlotIndex Idx = LiveInts->getInstructionIndex(MI); - if (MCID.mayLoad() && !LI.liveAt(Idx.getUseIndex())) { + if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) { report("Instruction loads from dead spill slot", MO, MONum); *OS << "Live stack: " << LI << '\n'; } - if (MCID.mayStore() && !LI.liveAt(Idx.getDefIndex())) { + if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) { report("Instruction stores to dead spill slot", MO, MONum); *OS << "Live stack: " << LI << '\n'; } @@ -816,10 +753,127 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } } +void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { + const MachineInstr *MI = MO->getParent(); + const unsigned Reg = MO->getReg(); + + // Both use and def operands can read a register. + if (MO->readsReg()) { + regsLiveInButUnused.erase(Reg); + + bool isKill = false; + unsigned defIdx; + if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { + // A two-addr use counts as a kill if use and def are the same. + unsigned DefReg = MI->getOperand(defIdx).getReg(); + if (Reg == DefReg) + isKill = true; + else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + report("Two-address instruction operands must be identical", MO, MONum); + } + } else + isKill = MO->isKill(); + + if (isKill) + addRegWithSubRegs(regsKilled, Reg); + + // Check that LiveVars knows this kill. + if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) && + MO->isKill()) { + LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); + if (std::find(VI.Kills.begin(), VI.Kills.end(), MI) == VI.Kills.end()) + report("Kill missing from LiveVariables", MO, MONum); + } + + // Check LiveInts liveness and kill. + if (TargetRegisterInfo::isVirtualRegister(Reg) && + LiveInts && !LiveInts->isNotInMIMap(MI)) { + SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true); + if (LiveInts->hasInterval(Reg)) { + const LiveInterval &LI = LiveInts->getInterval(Reg); + if (!LI.liveAt(UseIdx)) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << LI << '\n'; + } + // Check for extra kill flags. + // Note that we allow missing kill flags for now. + if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) { + report("Live range continues after kill flag", MO, MONum); + *OS << "Live range: " << LI << '\n'; + } + } else { + report("Virtual register has no Live interval", MO, MONum); + } + } + + // Use of a dead register. + if (!regsLive.count(Reg)) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // Reserved registers may be used even when 'dead'. + if (!isReserved(Reg)) + report("Using an undefined physical register", MO, MONum); + } else { + BBInfo &MInfo = MBBInfoMap[MI->getParent()]; + // We don't know which virtual registers are live in, so only complain + // if vreg was killed in this MBB. Otherwise keep track of vregs that + // must be live in. PHI instructions are handled separately. + if (MInfo.regsKilled.count(Reg)) + report("Using a killed virtual register", MO, MONum); + else if (!MI->isPHI()) + MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI)); + } + } + } + + if (MO->isDef()) { + // Register defined. + // TODO: verify that earlyclobber ops are not used. + if (MO->isDead()) + addRegWithSubRegs(regsDead, Reg); + else + addRegWithSubRegs(regsDefined, Reg); + + // Verify SSA form. + if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) && + llvm::next(MRI->def_begin(Reg)) != MRI->def_end()) + report("Multiple virtual register defs in SSA form", MO, MONum); + + // Check LiveInts for a live range, but only for virtual registers. + if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && + !LiveInts->isNotInMIMap(MI)) { + SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getRegSlot(); + if (LiveInts->hasInterval(Reg)) { + const LiveInterval &LI = LiveInts->getInterval(Reg); + if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) { + assert(VNI && "NULL valno is not allowed"); + if (VNI->def != DefIdx && !MO->isEarlyClobber()) { + report("Inconsistent valno->def", MO, MONum); + *OS << "Valno " << VNI->id << " is not defined at " + << DefIdx << " in " << LI << '\n'; + } + } else { + report("No live range at def", MO, MONum); + *OS << DefIdx << " is not live in " << LI << '\n'; + } + } else { + report("Virtual register has no Live interval", MO, MONum); + } + } + } +} + void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) { BBInfo &MInfo = MBBInfoMap[MI->getParent()]; set_union(MInfo.regsKilled, regsKilled); set_subtract(regsLive, regsKilled); regsKilled.clear(); + // Kill any masked registers. + while (!regMasks.empty()) { + const uint32_t *Mask = regMasks.pop_back_val(); + for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I) + if (TargetRegisterInfo::isPhysicalRegister(*I) && + MachineOperand::clobbersPhysReg(Mask, *I)) + regsDead.push_back(*I); + } set_subtract(regsLive, regsDead); regsDead.clear(); set_union(regsLive, regsDefined); regsDefined.clear(); @@ -855,7 +909,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) { void MachineVerifier::calcRegsPassed() { // First push live-out regs to successors' vregsPassed. Remember the MBBs that // have any vregsPassed. - DenseSet todo; + SmallPtrSet todo; for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) { const MachineBasicBlock &MBB(*MFI); @@ -892,7 +946,7 @@ void MachineVerifier::calcRegsPassed() { // similar to calcRegsPassed, only backwards. void MachineVerifier::calcRegsRequired() { // First push live-in regs to predecessors' vregsRequired. - DenseSet todo; + SmallPtrSet todo; for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) { const MachineBasicBlock &MBB(*MFI); @@ -925,9 +979,10 @@ void MachineVerifier::calcRegsRequired() { // Check PHI instructions at the beginning of MBB. It is assumed that // calcRegsPassed has been run so BBInfo::isLiveOut is valid. void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { + SmallPtrSet seen; for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end(); BBI != BBE && BBI->isPHI(); ++BBI) { - DenseSet seen; + seen.clear(); for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { unsigned Reg = BBI->getOperand(i).getReg(); @@ -968,8 +1023,17 @@ void MachineVerifier::visitMachineFunctionAfter() { } // Now check liveness info if available - if (LiveVars || LiveInts) - calcRegsRequired(); + calcRegsRequired(); + + if (MRI->isSSA() && !MF->empty()) { + BBInfo &MInfo = MBBInfoMap[&MF->front()]; + for (RegSet::iterator + I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; + ++I) + report("Virtual register def doesn't dominate all uses.", + MRI->getVRegDef(*I)); + } + if (LiveVars) verifyLiveVariables(); if (LiveInts) @@ -1065,33 +1129,43 @@ void MachineVerifier::verifyLiveIntervals() { report("No instruction at def index", MF); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << " in " << LI << '\n'; - } else if (!MI->modifiesRegister(LI.reg, TRI)) { - report("Defining instruction does not modify register", MI); - *OS << "Valno #" << VNI->id << " in " << LI << '\n'; + continue; } + bool hasDef = false; bool isEarlyClobber = false; - if (MI) { - for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { - if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() && - MOI->isEarlyClobber()) { - isEarlyClobber = true; - break; - } + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) + continue; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + if (MOI->getReg() != LI.reg) + continue; + } else { + if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || + !TRI->regsOverlap(LI.reg, MOI->getReg())) + continue; } + hasDef = true; + if (MOI->isEarlyClobber()) + isEarlyClobber = true; + } + + if (!hasDef) { + report("Defining instruction does not modify register", MI); + *OS << "Valno #" << VNI->id << " in " << LI << '\n'; } // Early clobber defs begin at USE slots, but other defs must begin at // DEF slots. if (isEarlyClobber) { - if (!VNI->def.isUse()) { - report("Early clobber def must be at a USE slot", MF); + if (!VNI->def.isEarlyClobber()) { + report("Early clobber def must be at an early-clobber slot", MF); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << " in " << LI << '\n'; } - } else if (!VNI->def.isDef()) { - report("Non-PHI, non-early clobber def must be at a DEF slot", MF); + } else if (!VNI->def.isRegister()) { + report("Non-PHI, non-early clobber def must be at a register slot", + MF); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << " in " << LI << '\n'; } @@ -1137,32 +1211,76 @@ void MachineVerifier::verifyLiveIntervals() { *OS << " in " << LI << '\n'; continue; } - if (I->end != LiveInts->getMBBEndIdx(EndMBB)) { - // The live segment is ending inside EndMBB - const MachineInstr *MI = - LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); - if (!MI) { - report("Live segment doesn't end at a valid instruction", EndMBB); + + // No more checks for live-out segments. + if (I->end == LiveInts->getMBBEndIdx(EndMBB)) + continue; + + // The live segment is ending inside EndMBB + const MachineInstr *MI = + LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); + if (!MI) { + report("Live segment doesn't end at a valid instruction", EndMBB); I->print(*OS); *OS << " in " << LI << '\n' << "Basic block starts at " - << MBBStartIdx << '\n'; - } else if (TargetRegisterInfo::isVirtualRegister(LI.reg) && - !MI->readsVirtualRegister(LI.reg)) { - // A live range can end with either a redefinition, a kill flag on a - // use, or a dead flag on a def. - // FIXME: Should we check for each of these? - bool hasDeadDef = false; - for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { - if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() && MOI->isDead()) { - hasDeadDef = true; - break; - } - } + << MBBStartIdx << '\n'; + continue; + } + // The block slot must refer to a basic block boundary. + if (I->end.isBlock()) { + report("Live segment ends at B slot of an instruction", MI); + I->print(*OS); + *OS << " in " << LI << '\n'; + } + + if (I->end.isDead()) { + // Segment ends on the dead slot. + // That means there must be a dead def. + if (!SlotIndex::isSameInstr(I->start, I->end)) { + report("Live segment ending at dead slot spans instructions", MI); + I->print(*OS); + *OS << " in " << LI << '\n'; + } + } + + // A live segment can only end at an early-clobber slot if it is being + // redefined by an early-clobber def. + if (I->end.isEarlyClobber()) { + if (I+1 == E || (I+1)->start != I->end) { + report("Live segment ending at early clobber slot must be " + "redefined by an EC def in the same instruction", MI); + I->print(*OS); + *OS << " in " << LI << '\n'; + } + } + + // The following checks only apply to virtual registers. Physreg liveness + // is too weird to check. + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + // A live range can end with either a redefinition, a kill flag on a + // use, or a dead flag on a def. + bool hasRead = false; + bool hasDeadDef = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || MOI->getReg() != LI.reg) + continue; + if (MOI->readsReg()) + hasRead = true; + if (MOI->isDef() && MOI->isDead()) + hasDeadDef = true; + } + + if (I->end.isDead()) { if (!hasDeadDef) { - report("Instruction killing live segment neither defines nor reads " - "register", MI); + report("Instruction doesn't have a dead def operand", MI); + I->print(*OS); + *OS << " in " << LI << '\n'; + } + } else { + if (!hasRead) { + report("Instruction ending live range doesn't read the register", + MI); I->print(*OS); *OS << " in " << LI << '\n'; } @@ -1192,8 +1310,8 @@ void MachineVerifier::verifyLiveIntervals() { // Check that VNI is live-out of all predecessors. for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), PE = MFI->pred_end(); PI != PE; ++PI) { - SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot(); - const VNInfo *PVNI = LI.getVNInfoAt(PEnd); + SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); + const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI)) continue; @@ -1201,7 +1319,7 @@ void MachineVerifier::verifyLiveIntervals() { if (!PVNI) { report("Register not marked live out of predecessor", *PI); *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live at " + << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " << PEnd << " in " << LI << '\n'; continue; } diff --git a/contrib/llvm/lib/CodeGen/ObjectCodeEmitter.cpp b/contrib/llvm/lib/CodeGen/ObjectCodeEmitter.cpp deleted file mode 100644 index cf05275d7a31..000000000000 --- a/contrib/llvm/lib/CodeGen/ObjectCodeEmitter.cpp +++ /dev/null @@ -1,141 +0,0 @@ -//===-- llvm/CodeGen/ObjectCodeEmitter.cpp -------------------- -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/BinaryObject.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineRelocation.h" -#include "llvm/CodeGen/ObjectCodeEmitter.h" - -//===----------------------------------------------------------------------===// -// ObjectCodeEmitter Implementation -//===----------------------------------------------------------------------===// - -namespace llvm { - -ObjectCodeEmitter::ObjectCodeEmitter() : BO(0) {} -ObjectCodeEmitter::ObjectCodeEmitter(BinaryObject *bo) : BO(bo) {} -ObjectCodeEmitter::~ObjectCodeEmitter() {} - -/// setBinaryObject - set the BinaryObject we are writting to -void ObjectCodeEmitter::setBinaryObject(BinaryObject *bo) { BO = bo; } - -/// emitByte - This callback is invoked when a byte needs to be -/// written to the data stream, without buffer overflow testing. -void ObjectCodeEmitter::emitByte(uint8_t B) { - BO->emitByte(B); -} - -/// emitWordLE - This callback is invoked when a 32-bit word needs to be -/// written to the data stream in little-endian format. -void ObjectCodeEmitter::emitWordLE(uint32_t W) { - BO->emitWordLE(W); -} - -/// emitWordBE - This callback is invoked when a 32-bit word needs to be -/// written to the data stream in big-endian format. -void ObjectCodeEmitter::emitWordBE(uint32_t W) { - BO->emitWordBE(W); -} - -/// emitDWordLE - This callback is invoked when a 64-bit word needs to be -/// written to the data stream in little-endian format. -void ObjectCodeEmitter::emitDWordLE(uint64_t W) { - BO->emitDWordLE(W); -} - -/// emitDWordBE - This callback is invoked when a 64-bit word needs to be -/// written to the data stream in big-endian format. -void ObjectCodeEmitter::emitDWordBE(uint64_t W) { - BO->emitDWordBE(W); -} - -/// emitAlignment - Align 'BO' to the necessary alignment boundary. -void ObjectCodeEmitter::emitAlignment(unsigned Alignment /* 0 */, - uint8_t fill /* 0 */) { - BO->emitAlignment(Alignment, fill); -} - -/// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be -/// written to the data stream. -void ObjectCodeEmitter::emitULEB128Bytes(uint64_t Value) { - BO->emitULEB128Bytes(Value); -} - -/// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be -/// written to the data stream. -void ObjectCodeEmitter::emitSLEB128Bytes(uint64_t Value) { - BO->emitSLEB128Bytes(Value); -} - -/// emitString - This callback is invoked when a String needs to be -/// written to the data stream. -void ObjectCodeEmitter::emitString(const std::string &String) { - BO->emitString(String); -} - -/// getCurrentPCValue - This returns the address that the next emitted byte -/// will be output to. -uintptr_t ObjectCodeEmitter::getCurrentPCValue() const { - return BO->getCurrentPCOffset(); -} - -/// getCurrentPCOffset - Return the offset from the start of the emitted -/// buffer that we are currently writing to. -uintptr_t ObjectCodeEmitter::getCurrentPCOffset() const { - return BO->getCurrentPCOffset(); -} - -/// addRelocation - Whenever a relocatable address is needed, it should be -/// noted with this interface. -void ObjectCodeEmitter::addRelocation(const MachineRelocation& relocation) { - BO->addRelocation(relocation); -} - -/// StartMachineBasicBlock - This should be called by the target when a new -/// basic block is about to be emitted. This way the MCE knows where the -/// start of the block is, and can implement getMachineBasicBlockAddress. -void ObjectCodeEmitter::StartMachineBasicBlock(MachineBasicBlock *MBB) { - if (MBBLocations.size() <= (unsigned)MBB->getNumber()) - MBBLocations.resize((MBB->getNumber()+1)*2); - MBBLocations[MBB->getNumber()] = getCurrentPCOffset(); -} - -/// getMachineBasicBlockAddress - Return the address of the specified -/// MachineBasicBlock, only usable after the label for the MBB has been -/// emitted. -uintptr_t -ObjectCodeEmitter::getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { - assert(MBBLocations.size() > (unsigned)MBB->getNumber() && - MBBLocations[MBB->getNumber()] && "MBB not emitted!"); - return MBBLocations[MBB->getNumber()]; -} - -/// getJumpTableEntryAddress - Return the address of the jump table with index -/// 'Index' in the function that last called initJumpTableInfo. -uintptr_t ObjectCodeEmitter::getJumpTableEntryAddress(unsigned Index) const { - assert(JTLocations.size() > Index && "JT not emitted!"); - return JTLocations[Index]; -} - -/// getConstantPoolEntryAddress - Return the address of the 'Index' entry in -/// the constant pool that was last emitted with the emitConstantPool method. -uintptr_t ObjectCodeEmitter::getConstantPoolEntryAddress(unsigned Index) const { - assert(CPLocations.size() > Index && "CP not emitted!"); - return CPLocations[Index]; -} - -/// getConstantPoolEntrySection - Return the section of the 'Index' entry in -/// the constant pool that was last emitted with the emitConstantPool method. -uintptr_t ObjectCodeEmitter::getConstantPoolEntrySection(unsigned Index) const { - assert(CPSections.size() > Index && "CP not emitted!"); - return CPSections[Index]; -} - -} // end namespace llvm - diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp index c05be130ec61..6da313e632af 100644 --- a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp +++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp @@ -56,11 +56,10 @@ namespace { } char OptimizePHIs::ID = 0; +char &llvm::OptimizePHIsID = OptimizePHIs::ID; INITIALIZE_PASS(OptimizePHIs, "opt-phis", "Optimize machine instruction PHIs", false, false) -FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); } - bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { MRI = &Fn.getRegInfo(); TII = Fn.getTarget().getInstrInfo(); @@ -165,7 +164,11 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) { InstrSet PHIsInCycle; if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) && SingleValReg != 0) { - MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg); + unsigned OldReg = MI->getOperand(0).getReg(); + if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg))) + continue; + + MRI->replaceRegWith(OldReg, SingleValReg); MI->eraseFromParent(); ++NumPHICycles; Changed = true; diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp index 6994aa58fbd5..0ed4c34bb105 100644 --- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp @@ -92,11 +92,15 @@ STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split"); STATISTIC(NumReused, "Number of reused lowered phis"); char PHIElimination::ID = 0; -INITIALIZE_PASS(PHIElimination, "phi-node-elimination", - "Eliminate PHI nodes for register allocation", false, false) - char& llvm::PHIEliminationID = PHIElimination::ID; +INITIALIZE_PASS_BEGIN(PHIElimination, "phi-node-elimination", + "Eliminate PHI nodes for register allocation", + false, false) +INITIALIZE_PASS_DEPENDENCY(LiveVariables) +INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination", + "Eliminate PHI nodes for register allocation", false, false) + void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addPreserved(); @@ -241,7 +245,6 @@ void PHIElimination::LowerAtomicPHINode( LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); // Increment use count of the newly created virtual register. - VI.NumUses++; LV->setPHIJoin(IncomingReg); // When we are reusing the incoming register, it may already have been @@ -410,7 +413,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, return false; // Quick exit for basic blocks without PHIs. bool Changed = false; - for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end(); + for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end(); BBI != BBE && BBI->isPHI(); ++BBI) { for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { unsigned Reg = BBI->getOperand(i).getReg(); diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp index 315aedddb9ef..53d1fcf7377a 100644 --- a/contrib/llvm/lib/CodeGen/Passes.cpp +++ b/contrib/llvm/lib/CodeGen/Passes.cpp @@ -12,62 +12,617 @@ // //===---------------------------------------------------------------------===// -#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; -//===---------------------------------------------------------------------===// +static cl::opt DisablePostRA("disable-post-ra", cl::Hidden, + cl::desc("Disable Post Regalloc")); +static cl::opt DisableBranchFold("disable-branch-fold", cl::Hidden, + cl::desc("Disable branch folding")); +static cl::opt DisableTailDuplicate("disable-tail-duplicate", cl::Hidden, + cl::desc("Disable tail duplication")); +static cl::opt DisableEarlyTailDup("disable-early-taildup", cl::Hidden, + cl::desc("Disable pre-register allocation tail duplication")); +static cl::opt EnableBlockPlacement("enable-block-placement", + cl::Hidden, cl::desc("Enable probability-driven block placement")); +static cl::opt EnableBlockPlacementStats("enable-block-placement-stats", + cl::Hidden, cl::desc("Collect probability-driven block placement stats")); +static cl::opt DisableCodePlace("disable-code-place", cl::Hidden, + cl::desc("Disable code placement")); +static cl::opt DisableSSC("disable-ssc", cl::Hidden, + cl::desc("Disable Stack Slot Coloring")); +static cl::opt DisableMachineDCE("disable-machine-dce", cl::Hidden, + cl::desc("Disable Machine Dead Code Elimination")); +static cl::opt DisableMachineLICM("disable-machine-licm", cl::Hidden, + cl::desc("Disable Machine LICM")); +static cl::opt DisableMachineCSE("disable-machine-cse", cl::Hidden, + cl::desc("Disable Machine Common Subexpression Elimination")); +static cl::opt +OptimizeRegAlloc("optimize-regalloc", cl::Hidden, + cl::desc("Enable optimized register allocation compilation path.")); +static cl::opt +EnableMachineSched("enable-misched", cl::Hidden, + cl::desc("Enable the machine instruction scheduling pass.")); +static cl::opt EnableStrongPHIElim("strong-phi-elim", cl::Hidden, + cl::desc("Use strong PHI elimination.")); +static cl::opt DisablePostRAMachineLICM("disable-postra-machine-licm", + cl::Hidden, + cl::desc("Disable Machine LICM")); +static cl::opt DisableMachineSink("disable-machine-sink", cl::Hidden, + cl::desc("Disable Machine Sinking")); +static cl::opt DisableLSR("disable-lsr", cl::Hidden, + cl::desc("Disable Loop Strength Reduction Pass")); +static cl::opt DisableCGP("disable-cgp", cl::Hidden, + cl::desc("Disable Codegen Prepare")); +static cl::opt DisableCopyProp("disable-copyprop", cl::Hidden, + cl::desc("Disable Copy Propagation pass")); +static cl::opt PrintLSR("print-lsr-output", cl::Hidden, + cl::desc("Print LLVM IR produced by the loop-reduce pass")); +static cl::opt PrintISelInput("print-isel-input", cl::Hidden, + cl::desc("Print LLVM IR input to isel pass")); +static cl::opt PrintGCInfo("print-gc", cl::Hidden, + cl::desc("Dump garbage collector data")); +static cl::opt VerifyMachineCode("verify-machineinstrs", cl::Hidden, + cl::desc("Verify generated machine code"), + cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); + +/// Allow standard passes to be disabled by command line options. This supports +/// simple binary flags that either suppress the pass or do nothing. +/// i.e. -disable-mypass=false has no effect. +/// These should be converted to boolOrDefault in order to use applyOverride. +static AnalysisID applyDisable(AnalysisID ID, bool Override) { + if (Override) + return &NoPassID; + return ID; +} + +/// Allow Pass selection to be overriden by command line options. This supports +/// flags with ternary conditions. TargetID is passed through by default. The +/// pass is suppressed when the option is false. When the option is true, the +/// StandardID is selected if the target provides no default. +static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override, + AnalysisID StandardID) { + switch (Override) { + case cl::BOU_UNSET: + return TargetID; + case cl::BOU_TRUE: + if (TargetID != &NoPassID) + return TargetID; + if (StandardID == &NoPassID) + report_fatal_error("Target cannot enable pass"); + return StandardID; + case cl::BOU_FALSE: + return &NoPassID; + } + llvm_unreachable("Invalid command line option state"); +} + +/// Allow standard passes to be disabled by the command line, regardless of who +/// is adding the pass. /// -/// RegisterRegAlloc class - Track the registration of register allocators. +/// StandardID is the pass identified in the standard pass pipeline and provided +/// to addPass(). It may be a target-specific ID in the case that the target +/// directly adds its own pass, but in that case we harmlessly fall through. /// +/// TargetID is the pass that the target has configured to override StandardID. +/// +/// StandardID may be a pseudo ID. In that case TargetID is the name of the real +/// pass to run. This allows multiple options to control a single pass depending +/// on where in the pipeline that pass is added. +static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) { + if (StandardID == &PostRASchedulerID) + return applyDisable(TargetID, DisablePostRA); + + if (StandardID == &BranchFolderPassID) + return applyDisable(TargetID, DisableBranchFold); + + if (StandardID == &TailDuplicateID) + return applyDisable(TargetID, DisableTailDuplicate); + + if (StandardID == &TargetPassConfig::EarlyTailDuplicateID) + return applyDisable(TargetID, DisableEarlyTailDup); + + if (StandardID == &MachineBlockPlacementID) + return applyDisable(TargetID, DisableCodePlace); + + if (StandardID == &CodePlacementOptID) + return applyDisable(TargetID, DisableCodePlace); + + if (StandardID == &StackSlotColoringID) + return applyDisable(TargetID, DisableSSC); + + if (StandardID == &DeadMachineInstructionElimID) + return applyDisable(TargetID, DisableMachineDCE); + + if (StandardID == &MachineLICMID) + return applyDisable(TargetID, DisableMachineLICM); + + if (StandardID == &MachineCSEID) + return applyDisable(TargetID, DisableMachineCSE); + + if (StandardID == &MachineSchedulerID) + return applyOverride(TargetID, EnableMachineSched, StandardID); + + if (StandardID == &TargetPassConfig::PostRAMachineLICMID) + return applyDisable(TargetID, DisablePostRAMachineLICM); + + if (StandardID == &MachineSinkingID) + return applyDisable(TargetID, DisableMachineSink); + + if (StandardID == &MachineCopyPropagationID) + return applyDisable(TargetID, DisableCopyProp); + + return TargetID; +} + //===---------------------------------------------------------------------===// +/// TargetPassConfig +//===---------------------------------------------------------------------===// + +INITIALIZE_PASS(TargetPassConfig, "targetpassconfig", + "Target Pass Configuration", false, false) +char TargetPassConfig::ID = 0; + +static char NoPassIDAnchor = 0; +char &llvm::NoPassID = NoPassIDAnchor; + +// Pseudo Pass IDs. +char TargetPassConfig::EarlyTailDuplicateID = 0; +char TargetPassConfig::PostRAMachineLICMID = 0; + +namespace llvm { +class PassConfigImpl { +public: + // List of passes explicitly substituted by this target. Normally this is + // empty, but it is a convenient way to suppress or replace specific passes + // that are part of a standard pass pipeline without overridding the entire + // pipeline. This mechanism allows target options to inherit a standard pass's + // user interface. For example, a target may disable a standard pass by + // default by substituting NoPass, and the user may still enable that standard + // pass with an explicit command line option. + DenseMap TargetPasses; +}; +} // namespace llvm + +// Out of line virtual method. +TargetPassConfig::~TargetPassConfig() { + delete Impl; +} + +// Out of line constructor provides default values for pass options and +// registers all common codegen passes. +TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) + : ImmutablePass(ID), TM(tm), PM(pm), Impl(0), Initialized(false), + DisableVerify(false), + EnableTailMerge(true) { + + Impl = new PassConfigImpl(); + + // Register all target independent codegen passes to activate their PassIDs, + // including this pass itself. + initializeCodeGen(*PassRegistry::getPassRegistry()); + + // Substitute Pseudo Pass IDs for real ones. + substitutePass(EarlyTailDuplicateID, TailDuplicateID); + substitutePass(PostRAMachineLICMID, MachineLICMID); + + // Temporarily disable experimental passes. + substitutePass(MachineSchedulerID, NoPassID); +} + +/// createPassConfig - Create a pass configuration object to be used by +/// addPassToEmitX methods for generating a pipeline of CodeGen passes. +/// +/// Targets may override this to extend TargetPassConfig. +TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) { + return new TargetPassConfig(this, PM); +} + +TargetPassConfig::TargetPassConfig() + : ImmutablePass(ID), PM(*(PassManagerBase*)0) { + llvm_unreachable("TargetPassConfig should not be constructed on-the-fly"); +} + +// Helper to verify the analysis is really immutable. +void TargetPassConfig::setOpt(bool &Opt, bool Val) { + assert(!Initialized && "PassConfig is immutable"); + Opt = Val; +} + +void TargetPassConfig::substitutePass(char &StandardID, char &TargetID) { + Impl->TargetPasses[&StandardID] = &TargetID; +} + +AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const { + DenseMap::const_iterator + I = Impl->TargetPasses.find(ID); + if (I == Impl->TargetPasses.end()) + return ID; + return I->second; +} + +/// Add a CodeGen pass at this point in the pipeline after checking for target +/// and command line overrides. +AnalysisID TargetPassConfig::addPass(char &ID) { + assert(!Initialized && "PassConfig is immutable"); + + AnalysisID TargetID = getPassSubstitution(&ID); + AnalysisID FinalID = overridePass(&ID, TargetID); + if (FinalID == &NoPassID) + return FinalID; + + Pass *P = Pass::createPass(FinalID); + if (!P) + llvm_unreachable("Pass ID not registered"); + PM.add(P); + return FinalID; +} + +void TargetPassConfig::printAndVerify(const char *Banner) const { + if (TM->shouldPrintMachineCode()) + PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); + + if (VerifyMachineCode) + PM.add(createMachineVerifierPass(Banner)); +} + +/// Add common target configurable passes that perform LLVM IR to IR transforms +/// following machine independent optimization. +void TargetPassConfig::addIRPasses() { + // Basic AliasAnalysis support. + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that + // BasicAliasAnalysis wins if they disagree. This is intended to help + // support "obvious" type-punning idioms. + PM.add(createTypeBasedAliasAnalysisPass()); + PM.add(createBasicAliasAnalysisPass()); + + // Before running any passes, run the verifier to determine if the input + // coming from the front-end and/or optimizer is valid. + if (!DisableVerify) + PM.add(createVerifierPass()); + + // Run loop strength reduction before anything else. + if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { + PM.add(createLoopStrengthReducePass(getTargetLowering())); + if (PrintLSR) + PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); + } + + PM.add(createGCLoweringPass()); + + // Make sure that no unreachable blocks are instruction selected. + PM.add(createUnreachableBlockEliminationPass()); +} + +/// Add common passes that perform LLVM IR to IR transforms in preparation for +/// instruction selection. +void TargetPassConfig::addISelPrepare() { + if (getOptLevel() != CodeGenOpt::None && !DisableCGP) + PM.add(createCodeGenPreparePass(getTargetLowering())); + + PM.add(createStackProtectorPass(getTargetLowering())); + + addPreISel(); + + if (PrintISelInput) + PM.add(createPrintFunctionPass("\n\n" + "*** Final LLVM Code input to ISel ***\n", + &dbgs())); + + // All passes which modify the LLVM IR are now complete; run the verifier + // to ensure that the IR is valid. + if (!DisableVerify) + PM.add(createVerifierPass()); +} + +/// Add the complete set of target-independent postISel code generator passes. +/// +/// This can be read as the standard order of major LLVM CodeGen stages. Stages +/// with nontrivial configuration or multiple passes are broken out below in +/// add%Stage routines. +/// +/// Any TargetPassConfig::addXX routine may be overriden by the Target. The +/// addPre/Post methods with empty header implementations allow injecting +/// target-specific fixups just before or after major stages. Additionally, +/// targets have the flexibility to change pass order within a stage by +/// overriding default implementation of add%Stage routines below. Each +/// technique has maintainability tradeoffs because alternate pass orders are +/// not well supported. addPre/Post works better if the target pass is easily +/// tied to a common pass. But if it has subtle dependencies on multiple passes, +/// the target should override the stage instead. +/// +/// TODO: We could use a single addPre/Post(ID) hook to allow pass injection +/// before/after any target-independent pass. But it's currently overkill. +void TargetPassConfig::addMachinePasses() { + // Print the instruction selected machine code... + printAndVerify("After Instruction Selection"); + + // Expand pseudo-instructions emitted by ISel. + addPass(ExpandISelPseudosID); + + // Add passes that optimize machine instructions in SSA form. + if (getOptLevel() != CodeGenOpt::None) { + addMachineSSAOptimization(); + } + else { + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + addPass(LocalStackSlotAllocationID); + } + + // Run pre-ra passes. + if (addPreRegAlloc()) + printAndVerify("After PreRegAlloc passes"); + + // Run register allocation and passes that are tightly coupled with it, + // including phi elimination and scheduling. + if (getOptimizeRegAlloc()) + addOptimizedRegAlloc(createRegAllocPass(true)); + else + addFastRegAlloc(createRegAllocPass(false)); + + // Run post-ra passes. + if (addPostRegAlloc()) + printAndVerify("After PostRegAlloc passes"); + + // Insert prolog/epilog code. Eliminate abstract frame index references... + addPass(PrologEpilogCodeInserterID); + printAndVerify("After PrologEpilogCodeInserter"); + + /// Add passes that optimize machine instructions after register allocation. + if (getOptLevel() != CodeGenOpt::None) + addMachineLateOptimization(); + + // Expand pseudo instructions before second scheduling pass. + addPass(ExpandPostRAPseudosID); + printAndVerify("After ExpandPostRAPseudos"); + + // Run pre-sched2 passes. + if (addPreSched2()) + printAndVerify("After PreSched2 passes"); + + // Second pass scheduler. + if (getOptLevel() != CodeGenOpt::None) { + addPass(PostRASchedulerID); + printAndVerify("After PostRAScheduler"); + } + + // GC + addPass(GCMachineCodeAnalysisID); + if (PrintGCInfo) + PM.add(createGCInfoPrinter(dbgs())); + + // Basic block placement. + if (getOptLevel() != CodeGenOpt::None) + addBlockPlacement(); + + if (addPreEmitPass()) + printAndVerify("After PreEmit passes"); +} + +/// Add passes that optimize machine instructions in SSA form. +void TargetPassConfig::addMachineSSAOptimization() { + // Pre-ra tail duplication. + if (addPass(EarlyTailDuplicateID) != &NoPassID) + printAndVerify("After Pre-RegAlloc TailDuplicate"); + + // Optimize PHIs before DCE: removing dead PHI cycles may make more + // instructions dead. + addPass(OptimizePHIsID); + + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + addPass(LocalStackSlotAllocationID); + + // With optimization, dead code should already be eliminated. However + // there is one known exception: lowered code for arguments that are only + // used by tail calls, where the tail calls reuse the incoming stack + // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). + addPass(DeadMachineInstructionElimID); + printAndVerify("After codegen DCE pass"); + + addPass(MachineLICMID); + addPass(MachineCSEID); + addPass(MachineSinkingID); + printAndVerify("After Machine LICM, CSE and Sinking passes"); + + addPass(PeepholeOptimizerID); + printAndVerify("After codegen peephole optimization pass"); +} + +//===---------------------------------------------------------------------===// +/// Register Allocation Pass Configuration +//===---------------------------------------------------------------------===// + +bool TargetPassConfig::getOptimizeRegAlloc() const { + switch (OptimizeRegAlloc) { + case cl::BOU_UNSET: return getOptLevel() != CodeGenOpt::None; + case cl::BOU_TRUE: return true; + case cl::BOU_FALSE: return false; + } + llvm_unreachable("Invalid optimize-regalloc state"); +} + +/// RegisterRegAlloc's global Registry tracks allocator registration. MachinePassRegistry RegisterRegAlloc::Registry; -static FunctionPass *createDefaultRegisterAllocator() { return 0; } +/// A dummy default pass factory indicates whether the register allocator is +/// overridden on the command line. +static FunctionPass *useDefaultRegisterAllocator() { return 0; } static RegisterRegAlloc defaultRegAlloc("default", "pick register allocator based on -O option", - createDefaultRegisterAllocator); + useDefaultRegisterAllocator); -//===---------------------------------------------------------------------===// -/// -/// RegAlloc command line options. -/// -//===---------------------------------------------------------------------===// +/// -regalloc=... command line option. static cl::opt > RegAlloc("regalloc", - cl::init(&createDefaultRegisterAllocator), + cl::init(&useDefaultRegisterAllocator), cl::desc("Register allocator to use")); -//===---------------------------------------------------------------------===// +/// Instantiate the default register allocator pass for this target for either +/// the optimized or unoptimized allocation path. This will be added to the pass +/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc +/// in the optimized case. /// -/// createRegisterAllocator - choose the appropriate register allocator. +/// A target that uses the standard regalloc pass order for fast or optimized +/// allocation may still override this for per-target regalloc +/// selection. But -regalloc=... always takes precedence. +FunctionPass *TargetPassConfig::createTargetRegisterAllocator(bool Optimized) { + if (Optimized) + return createGreedyRegisterAllocator(); + else + return createFastRegisterAllocator(); +} + +/// Find and instantiate the register allocation pass requested by this target +/// at the current optimization level. Different register allocators are +/// defined as separate passes because they may require different analysis. /// -//===---------------------------------------------------------------------===// -FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) { +/// This helper ensures that the regalloc= option is always available, +/// even for targets that override the default allocator. +/// +/// FIXME: When MachinePassRegistry register pass IDs instead of function ptrs, +/// this can be folded into addPass. +FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) { RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault(); + // Initialize the global default. if (!Ctor) { Ctor = RegAlloc; RegisterRegAlloc::setDefault(RegAlloc); } - - // This forces linking of the linear scan register allocator, - // so -regalloc=linearscan still works in clang. - if (Ctor == createLinearScanRegisterAllocator) - return createLinearScanRegisterAllocator(); - - if (Ctor != createDefaultRegisterAllocator) + if (Ctor != useDefaultRegisterAllocator) return Ctor(); - // When the 'default' allocator is requested, pick one based on OptLevel. - switch (OptLevel) { - case CodeGenOpt::None: - return createFastRegisterAllocator(); - default: - return createGreedyRegisterAllocator(); + // With no -regalloc= override, ask the target for a regalloc pass. + return createTargetRegisterAllocator(Optimized); +} + +/// Add the minimum set of target-independent passes that are required for +/// register allocation. No coalescing or scheduling. +void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { + addPass(PHIEliminationID); + addPass(TwoAddressInstructionPassID); + + PM.add(RegAllocPass); + printAndVerify("After Register Allocation"); +} + +/// Add standard target-independent passes that are tightly coupled with +/// optimized register allocation, including coalescing, machine instruction +/// scheduling, and register allocation itself. +void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + // LiveVariables currently requires pure SSA form. + // + // FIXME: Once TwoAddressInstruction pass no longer uses kill flags, + // LiveVariables can be removed completely, and LiveIntervals can be directly + // computed. (We still either need to regenerate kill flags after regalloc, or + // preferably fix the scavenger to not depend on them). + addPass(LiveVariablesID); + + // Add passes that move from transformed SSA into conventional SSA. This is a + // "copy coalescing" problem. + // + if (!EnableStrongPHIElim) { + // Edge splitting is smarter with machine loop info. + addPass(MachineLoopInfoID); + addPass(PHIEliminationID); + } + addPass(TwoAddressInstructionPassID); + + // FIXME: Either remove this pass completely, or fix it so that it works on + // SSA form. We could modify LiveIntervals to be independent of this pass, But + // it would be even better to simply eliminate *all* IMPLICIT_DEFs before + // leaving SSA. + addPass(ProcessImplicitDefsID); + + if (EnableStrongPHIElim) + addPass(StrongPHIEliminationID); + + addPass(RegisterCoalescerID); + + // PreRA instruction scheduling. + if (addPass(MachineSchedulerID) != &NoPassID) + printAndVerify("After Machine Scheduling"); + + // Add the selected register allocation pass. + PM.add(RegAllocPass); + printAndVerify("After Register Allocation"); + + // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature, + // but eventually, all users of it should probably be moved to addPostRA and + // it can go away. Currently, it's the intended place for targets to run + // FinalizeMachineBundles, because passes other than MachineScheduling an + // RegAlloc itself may not be aware of bundles. + if (addFinalizeRegAlloc()) + printAndVerify("After RegAlloc finalization"); + + // Perform stack slot coloring and post-ra machine LICM. + // + // FIXME: Re-enable coloring with register when it's capable of adding + // kill markers. + addPass(StackSlotColoringID); + + // Run post-ra machine LICM to hoist reloads / remats. + // + // FIXME: can this move into MachineLateOptimization? + addPass(PostRAMachineLICMID); + + printAndVerify("After StackSlotColoring and postra Machine LICM"); +} + +//===---------------------------------------------------------------------===// +/// Post RegAlloc Pass Configuration +//===---------------------------------------------------------------------===// + +/// Add passes that optimize machine instructions after register allocation. +void TargetPassConfig::addMachineLateOptimization() { + // Branch folding must be run after regalloc and prolog/epilog insertion. + if (addPass(BranchFolderPassID) != &NoPassID) + printAndVerify("After BranchFolding"); + + // Tail duplication. + if (addPass(TailDuplicateID) != &NoPassID) + printAndVerify("After TailDuplicate"); + + // Copy propagation. + if (addPass(MachineCopyPropagationID) != &NoPassID) + printAndVerify("After copy propagation pass"); +} + +/// Add standard basic block placement passes. +void TargetPassConfig::addBlockPlacement() { + AnalysisID ID = &NoPassID; + if (EnableBlockPlacement) { + // MachineBlockPlacement is an experimental pass which is disabled by + // default currently. Eventually it should subsume CodePlacementOpt, so + // when enabled, the other is disabled. + ID = addPass(MachineBlockPlacementID); + } else { + ID = addPass(CodePlacementOptID); + } + if (ID != &NoPassID) { + // Run a separate pass to collect block placement statistics. + if (EnableBlockPlacementStats) + addPass(MachineBlockPlacementStatsID); + + printAndVerify("After machine block placement."); } } diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp index bbc7ce2d0a42..9c5c029000c0 100644 --- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -39,7 +39,7 @@ // => // v1 = bitcast v0 // = v0 -// +// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "peephole-opt" @@ -68,7 +68,7 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); STATISTIC(NumCmps, "Number of compares eliminated"); -STATISTIC(NumImmFold, "Number of move immediate foled"); +STATISTIC(NumImmFold, "Number of move immediate folded"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -109,22 +109,19 @@ namespace { } char PeepholeOptimizer::ID = 0; +char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts", "Peephole Optimizations", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", "Peephole Optimizations", false, false) -FunctionPass *llvm::createPeepholeOptimizerPass() { - return new PeepholeOptimizer(); -} - /// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads /// a single register and writes a single register and it does not modify the /// source, and if the source value is preserved as a sub-register of the /// result, then replace all reachable uses of the source with the subreg of the /// result. -/// +/// /// Do not generate an EXTRACT that is used only in a debug use, as this changes /// the code. Since this code does not currently share EXTRACTs, just ignore all /// debug uses. @@ -134,7 +131,7 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, unsigned SrcReg, DstReg, SubIdx; if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) return false; - + if (TargetRegisterInfo::isPhysicalRegister(DstReg) || TargetRegisterInfo::isPhysicalRegister(SrcReg)) return false; @@ -240,6 +237,10 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, if (PHIBBs.count(UseMBB)) continue; + // About to add uses of DstReg, clear DstReg's kill flags. + if (!Changed) + MRI->clearKillFlags(DstReg); + unsigned NewVR = MRI->createVirtualRegister(RC); BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVR) @@ -292,7 +293,7 @@ bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI, assert(Def && Src && "Malformed bitcast instruction!"); MachineInstr *DefMI = MRI->getVRegDef(Src); - if (!DefMI || !DefMI->getDesc().isBitcast()) + if (!DefMI || !DefMI->isBitcast()) return false; unsigned SrcSrc = 0; @@ -353,7 +354,7 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs) { const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isMoveImmediate()) + if (!MI->isMoveImmediate()) return false; if (MCID.getNumDefs() != 1) return false; @@ -363,7 +364,7 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, ImmDefRegs.insert(Reg); return true; } - + return false; } @@ -395,7 +396,7 @@ bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (DisablePeephole) return false; - + TM = &MF.getTarget(); TII = TM->getInstrInfo(); MRI = &MF.getRegInfo(); @@ -408,7 +409,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { DenseMap ImmDefMIs; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; - + bool SeenMoveImm = false; LocalMIs.clear(); ImmDefRegs.clear(); @@ -428,17 +429,15 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } - const MCInstrDesc &MCID = MI->getDesc(); - - if (MCID.isBitcast()) { + if (MI->isBitcast()) { if (OptimizeBitcastInstr(MI, MBB)) { // MI is deleted. LocalMIs.erase(MI); Changed = true; MII = First ? I->begin() : llvm::next(PMII); continue; - } - } else if (MCID.isCompare()) { + } + } else if (MI->isCompare()) { if (OptimizeCmpInstr(MI, MBB)) { // MI is deleted. LocalMIs.erase(MI); diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp index c73e87733cb4..24d3e5ab0c9d 100644 --- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -23,7 +23,6 @@ #include "AggressiveAntiDepBreaker.h" #include "CriticalAntiDepBreaker.h" #include "RegisterClassInfo.h" -#include "ScheduleDAGInstrs.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/SchedulerRegistry.h" @@ -32,6 +31,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetLowering.h" @@ -45,7 +45,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Statistic.h" -#include using namespace llvm; STATISTIC(NumNoops, "Number of noops inserted"); @@ -82,16 +81,15 @@ namespace { AliasAnalysis *AA; const TargetInstrInfo *TII; RegisterClassInfo RegClassInfo; - CodeGenOpt::Level OptLevel; public: static char ID; - PostRAScheduler(CodeGenOpt::Level ol) : - MachineFunctionPass(ID), OptLevel(ol) {} + PostRAScheduler() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -99,10 +97,6 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } - const char *getPassName() const { - return "Post RA top-down list latency scheduler"; - } - bool runOnMachineFunction(MachineFunction &Fn); }; char PostRAScheduler::ID = 0; @@ -130,36 +124,49 @@ namespace { /// AA - AliasAnalysis for making memory reference queries. AliasAnalysis *AA; - /// KillIndices - The index of the most recent kill (proceding bottom-up), - /// or ~0u if the register is not live. - std::vector KillIndices; + /// LiveRegs - true if the register is live. + BitVector LiveRegs; + + /// The schedule. Null SUnit*'s represent noop instructions. + std::vector Sequence; public: SchedulePostRATDList( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, AliasAnalysis *AA, const RegisterClassInfo&, TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, - SmallVectorImpl &CriticalPathRCs); + SmallVectorImpl &CriticalPathRCs); ~SchedulePostRATDList(); - /// StartBlock - Initialize register live-range state for scheduling in + /// startBlock - Initialize register live-range state for scheduling in /// this block. /// - void StartBlock(MachineBasicBlock *BB); + void startBlock(MachineBasicBlock *BB); + + /// Initialize the scheduler state for the next scheduling region. + virtual void enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount); + + /// Notify that the scheduler has finished scheduling the current region. + virtual void exitRegion(); /// Schedule - Schedule the instruction range using list scheduling. /// - void Schedule(); + void schedule(); + + void EmitSchedule(); /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. /// void Observe(MachineInstr *MI, unsigned Count); - /// FinishBlock - Clean up register live-range state. + /// finishBlock - Clean up register live-range state. /// - void FinishBlock(); + void finishBlock(); /// FixupKills - Fix register kill flags that have been made /// invalid due to scheduling @@ -177,16 +184,23 @@ namespace { // adjustments may be made to the instruction if necessary. Return // true if the operand has been deleted, false if not. bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO); + + void dumpSchedule() const; }; } +char &llvm::PostRASchedulerID = PostRAScheduler::ID; + +INITIALIZE_PASS(PostRAScheduler, "post-RA-sched", + "Post RA top-down list latency scheduler", false, false) + SchedulePostRATDList::SchedulePostRATDList( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, AliasAnalysis *AA, const RegisterClassInfo &RCI, TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, - SmallVectorImpl &CriticalPathRCs) - : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA), - KillIndices(TRI->getNumRegs()) + SmallVectorImpl &CriticalPathRCs) + : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), Topo(SUnits), AA(AA), + LiveRegs(TRI->getNumRegs()) { const TargetMachine &TM = MF.getTarget(); const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); @@ -204,16 +218,48 @@ SchedulePostRATDList::~SchedulePostRATDList() { delete AntiDepBreak; } +/// Initialize state associated with the next scheduling region. +void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount) { + ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); + Sequence.clear(); +} + +/// Print the schedule before exiting the region. +void SchedulePostRATDList::exitRegion() { + DEBUG({ + dbgs() << "*** Final schedule ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); + ScheduleDAGInstrs::exitRegion(); +} + +/// dumpSchedule - dump the scheduled Sequence. +void SchedulePostRATDList::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + SU->dump(this); + else + dbgs() << "**** NOOP ****\n"; + } +} + bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { TII = Fn.getTarget().getInstrInfo(); MachineLoopInfo &MLI = getAnalysis(); MachineDominatorTree &MDT = getAnalysis(); AliasAnalysis *AA = &getAnalysis(); + TargetPassConfig *PassConfig = &getAnalysis(); + RegClassInfo.runOnMachineFunction(Fn); // Check for explicit enable/disable of post-ra scheduling. - TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = TargetSubtargetInfo::ANTIDEP_NONE; - SmallVector CriticalPathRCs; + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = + TargetSubtargetInfo::ANTIDEP_NONE; + SmallVector CriticalPathRCs; if (EnablePostRAScheduler.getPosition() > 0) { if (!EnablePostRAScheduler) return false; @@ -221,7 +267,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { // Check that post-RA scheduling is enabled for this target. // This may upgrade the AntiDepMode. const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget(); - if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs)) + if (!ST.enablePostRAScheduler(PassConfig->getOptLevel(), AntiDepMode, + CriticalPathRCs)) return false; } @@ -248,13 +295,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { static int bbcnt = 0; if (bbcnt++ % DebugDiv != DebugMod) continue; - dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() << - ":BB#" << MBB->getNumber() << " ***\n"; + dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getName() + << ":BB#" << MBB->getNumber() << " ***\n"; } #endif // Initialize register live-range state for scheduling in this block. - Scheduler.StartBlock(MBB); + Scheduler.startBlock(MBB); // Schedule each sequence of instructions not interrupted by a label // or anything else that effectively needs to shut down scheduling. @@ -262,8 +309,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { unsigned Count = MBB->size(), CurrentCount = Count; for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { MachineInstr *MI = llvm::prior(I); - if (TII->isSchedulingBoundary(MI, MBB, Fn)) { - Scheduler.Run(MBB, I, Current, CurrentCount); + // Calls are not scheduling boundaries before register allocation, but + // post-ra we don't gain anything by scheduling across calls since we + // don't need to worry about register pressure. + if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) { + Scheduler.enterRegion(MBB, I, Current, CurrentCount); + Scheduler.schedule(); + Scheduler.exitRegion(); Scheduler.EmitSchedule(); Current = MI; CurrentCount = Count - 1; @@ -271,15 +323,19 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { } I = MI; --Count; + if (MI->isBundle()) + Count -= MI->getBundleSize(); } assert(Count == 0 && "Instruction count mismatch!"); assert((MBB->begin() == Current || CurrentCount != 0) && "Instruction count mismatch!"); - Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount); + Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount); + Scheduler.schedule(); + Scheduler.exitRegion(); Scheduler.EmitSchedule(); // Clean up register live-range state. - Scheduler.FinishBlock(); + Scheduler.finishBlock(); // Update register kills Scheduler.FixupKills(MBB); @@ -291,9 +347,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { /// StartBlock - Initialize register live-range state for scheduling in /// this block. /// -void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { +void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) { // Call the superclass. - ScheduleDAGInstrs::StartBlock(BB); + ScheduleDAGInstrs::startBlock(BB); // Reset the hazard recognizer and anti-dep breaker. HazardRec->Reset(); @@ -303,14 +359,14 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { /// Schedule - Schedule the instruction range using list scheduling. /// -void SchedulePostRATDList::Schedule() { +void SchedulePostRATDList::schedule() { // Build the scheduling graph. - BuildSchedGraph(AA); + buildSchedGraph(AA); if (AntiDepBreak != NULL) { unsigned Broken = - AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos, - InsertPosIndex, DbgValues); + AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd, + EndIndex, DbgValues); if (Broken != 0) { // We made changes. Update the dependency graph. @@ -319,11 +375,8 @@ void SchedulePostRATDList::Schedule() { // the def's anti-dependence *and* output-dependence edges due to // that register, and add new anti-dependence and output-dependence // edges based on the next live range of the register. - SUnits.clear(); - Sequence.clear(); - EntrySU = SUnit(); - ExitSU = SUnit(); - BuildSchedGraph(AA); + ScheduleDAG::clearDAG(); + buildSchedGraph(AA); NumFixedAnti += Broken; } @@ -343,38 +396,36 @@ void SchedulePostRATDList::Schedule() { /// void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) { if (AntiDepBreak != NULL) - AntiDepBreak->Observe(MI, Count, InsertPosIndex); + AntiDepBreak->Observe(MI, Count, EndIndex); } /// FinishBlock - Clean up register live-range state. /// -void SchedulePostRATDList::FinishBlock() { +void SchedulePostRATDList::finishBlock() { if (AntiDepBreak != NULL) AntiDepBreak->FinishBlock(); // Call the superclass. - ScheduleDAGInstrs::FinishBlock(); + ScheduleDAGInstrs::finishBlock(); } /// StartBlockForKills - Initialize register live-range state for updating kills /// void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { - // Initialize the indices to indicate that no registers are live. - for (unsigned i = 0; i < TRI->getNumRegs(); ++i) - KillIndices[i] = ~0u; + // Start with no live registers. + LiveRegs.reset(); // Determine the live-out physregs for this block. - if (!BB->empty() && BB->back().getDesc().isReturn()) { + if (!BB->empty() && BB->back().isReturn()) { // In a return block, examine the function live-out regs. for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), E = MRI.liveout_end(); I != E; ++I) { unsigned Reg = *I; - KillIndices[Reg] = BB->size(); + LiveRegs.set(Reg); // Repeat, for all subregs. - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - KillIndices[*Subreg] = BB->size(); - } + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + LiveRegs.set(*Subreg); } } else { @@ -384,12 +435,11 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; - KillIndices[Reg] = BB->size(); + LiveRegs.set(Reg); // Repeat, for all subregs. - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - KillIndices[*Subreg] = BB->size(); - } + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + LiveRegs.set(*Subreg); } } } @@ -404,7 +454,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, } // If MO itself is live, clear the kill flag... - if (KillIndices[MO.getReg()] != ~0u) { + if (LiveRegs.test(MO.getReg())) { MO.setIsKill(false); return false; } @@ -414,9 +464,9 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, MO.setIsKill(false); bool AllDead = true; const unsigned SuperReg = MO.getReg(); - for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg); + for (const uint16_t *Subreg = TRI->getSubRegisters(SuperReg); *Subreg; ++Subreg) { - if (KillIndices[*Subreg] != ~0u) { + if (LiveRegs.test(*Subreg)) { MI->addOperand(MachineOperand::CreateReg(*Subreg, true /*IsDef*/, true /*IsImp*/, @@ -437,7 +487,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); - std::set killedRegs; + BitVector killedRegs(TRI->getNumRegs()); BitVector ReservedRegs = TRI->getReservedRegs(MF); StartBlockForKills(MBB); @@ -455,6 +505,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { // are completely defined. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + LiveRegs.clearBitsNotInMask(MO.getRegMask()); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; @@ -462,19 +514,18 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { // Ignore two-addr defs. if (MI->isRegTiedToUseOperand(i)) continue; - KillIndices[Reg] = ~0u; + LiveRegs.reset(Reg); // Repeat for all subregs. - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - KillIndices[*Subreg] = ~0u; - } + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + LiveRegs.reset(*Subreg); } // Examine all used registers and set/clear kill flag. When a // register is used multiple times we only set the kill flag on // the first use. - killedRegs.clear(); + killedRegs.reset(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; @@ -482,12 +533,12 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { if ((Reg == 0) || ReservedRegs.test(Reg)) continue; bool kill = false; - if (killedRegs.find(Reg) == killedRegs.end()) { + if (!killedRegs.test(Reg)) { kill = true; // A register is not killed if any subregs are live... - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) { - if (KillIndices[*Subreg] != ~0u) { + if (LiveRegs.test(*Subreg)) { kill = false; break; } @@ -496,7 +547,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { // If subreg is not live, then register is killed if it became // live in this instruction if (kill) - kill = (KillIndices[Reg] == ~0u); + kill = !LiveRegs.test(Reg); } if (MO.isKill() != kill) { @@ -506,7 +557,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { DEBUG(MI->dump()); } - killedRegs.insert(Reg); + killedRegs.set(Reg); } // Mark any used register (that is not using undef) and subregs as @@ -517,12 +568,11 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { unsigned Reg = MO.getReg(); if ((Reg == 0) || ReservedRegs.test(Reg)) continue; - KillIndices[Reg] = Count; + LiveRegs.set(Reg); - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - KillIndices[*Subreg] = Count; - } + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + LiveRegs.set(*Subreg); } } } @@ -585,7 +635,7 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { ReleaseSuccessors(SU); SU->isScheduled = true; - AvailableQueue.ScheduledNode(SU); + AvailableQueue.scheduledNode(SU); } /// ListScheduleTopDown - The main loop of list scheduling for top-down @@ -699,14 +749,46 @@ void SchedulePostRATDList::ListScheduleTopDown() { } #ifndef NDEBUG - VerifySchedule(/*isBottomUp=*/false); -#endif + unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false); + unsigned Noops = 0; + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(Sequence.size() - Noops == ScheduledNodes && + "The number of nodes scheduled doesn't match the expected number!"); +#endif // NDEBUG } -//===----------------------------------------------------------------------===// -// Public Constructor Functions -//===----------------------------------------------------------------------===// +// EmitSchedule - Emit the machine code in scheduled order. +void SchedulePostRATDList::EmitSchedule() { + RegionBegin = RegionEnd; -FunctionPass *llvm::createPostRAScheduler(CodeGenOpt::Level OptLevel) { - return new PostRAScheduler(OptLevel); + // If first instruction was a DBG_VALUE then put it back. + if (FirstDbgValue) + BB->splice(RegionEnd, BB, FirstDbgValue); + + // Then re-insert them according to the given schedule. + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + BB->splice(RegionEnd, BB, SU->getInstr()); + else + // Null SUnit* is a noop. + TII->insertNoop(*BB, RegionEnd); + + // Update the Begin iterator, as the first instruction in the block + // may have been scheduled later. + if (i == 0) + RegionBegin = prior(RegionEnd); + } + + // Reinsert any remaining debug_values. + for (std::vector >::iterator + DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { + std::pair P = *prior(DI); + MachineInstr *DbgValue = P.first; + MachineBasicBlock::iterator OrigPrivMI = P.second; + BB->splice(++OrigPrivMI, BB, DbgValue); + } + DbgValues.clear(); + FirstDbgValue = NULL; } diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index b1d8c9760225..1ad3479afb4c 100644 --- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -26,6 +26,8 @@ using namespace llvm; char ProcessImplicitDefs::ID = 0; +char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID; + INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs", "Process Implicit Definitions", false, false) INITIALIZE_PASS_DEPENDENCY(LiveVariables) @@ -36,7 +38,6 @@ void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreserved(); AU.addPreserved(); - AU.addRequired(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); AU.addPreservedID(TwoAddressInstructionPassID); @@ -50,10 +51,10 @@ ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, SmallSet &ImpDefRegs) { switch(OpIdx) { case 1: - return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 || + return MI->isCopy() && (!MI->getOperand(0).readsReg() || ImpDefRegs.count(MI->getOperand(0).getReg())); case 2: - return MI->isSubregToReg() && (MI->getOperand(0).getSubReg() == 0 || + return MI->isSubregToReg() && (!MI->getOperand(0).readsReg() || ImpDefRegs.count(MI->getOperand(0).getReg())); default: return false; } @@ -66,7 +67,7 @@ static bool isUndefCopy(MachineInstr *MI, unsigned Reg, MachineOperand &MO1 = MI->getOperand(1); if (MO1.getReg() != Reg) return false; - if (!MO0.getSubReg() || ImpDefRegs.count(MO0.getReg())) + if (!MO0.readsReg() || ImpDefRegs.count(MO0.getReg())) return true; return false; } @@ -87,7 +88,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { TII = fn.getTarget().getInstrInfo(); TRI = fn.getTarget().getRegisterInfo(); MRI = &fn.getRegInfo(); - LV = &getAnalysis(); + LV = getAnalysisIfAvailable(); SmallSet ImpDefRegs; SmallVector ImpDefMIs; @@ -105,23 +106,24 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { MachineInstr *MI = &*I; ++I; if (MI->isImplicitDef()) { - if (MI->getOperand(0).getSubReg()) + ImpDefMIs.push_back(MI); + // Is this a sub-register read-modify-write? + if (MI->getOperand(0).readsReg()) continue; unsigned Reg = MI->getOperand(0).getReg(); ImpDefRegs.insert(Reg); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS) + for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS) ImpDefRegs.insert(*SS); } - ImpDefMIs.push_back(MI); continue; } // Eliminate %reg1032:sub = COPY undef. - if (MI->isCopy() && MI->getOperand(0).getSubReg()) { + if (MI->isCopy() && MI->getOperand(0).readsReg()) { MachineOperand &MO = MI->getOperand(1); if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) { - if (MO.isKill()) { + if (LV && MO.isKill()) { LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg()); vi.removeKill(MI); } @@ -140,7 +142,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { bool ChangedToImpDef = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg() || (MO.isDef() && !MO.getSubReg()) || MO.isUndef()) + if (!MO.isReg() || !MO.readsReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) @@ -155,8 +157,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { MI->RemoveOperand(j); if (isKill) { ImpDefRegs.erase(Reg); - LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); - vi.removeKill(MI); + if (LV) { + LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); + vi.removeKill(MI); + } } ChangedToImpDef = true; Changed = true; @@ -172,10 +176,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { continue; } if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { - // Make sure other uses of + // Make sure other reads of Reg are also marked . for (unsigned j = i+1; j != e; ++j) { MachineOperand &MOJ = MI->getOperand(j); - if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg) + if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg()) MOJ.setIsUndef(); } ImpDefRegs.erase(Reg); @@ -265,7 +269,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { } // Update LiveVariables varinfo if the instruction is a kill. - if (isKill) { + if (LV && isKill) { LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); vi.removeKill(RMI); } diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 32c932552bed..458915ea5d93 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -45,24 +45,22 @@ using namespace llvm; char PEI::ID = 0; +char &llvm::PrologEpilogCodeInserterID = PEI::ID; INITIALIZE_PASS_BEGIN(PEI, "prologepilog", "Prologue/Epilogue Insertion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_END(PEI, "prologepilog", - "Prologue/Epilogue Insertion", false, false) + "Prologue/Epilogue Insertion & Frame Finalization", + false, false) STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered"); STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); STATISTIC(NumBytesStackSpace, "Number of bytes used for stack in all functions"); -/// createPrologEpilogCodeInserter - This function returns a pass that inserts -/// prolog and epilog code, and eliminates abstract frame references. -/// -FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); } - /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. /// @@ -71,6 +69,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); + assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs"); + RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); @@ -125,6 +125,9 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) scavengeFrameVirtualRegs(Fn); + // Clear any vregs created by virtual scavenging. + Fn.getRegInfo().clearVirtRegs(); + delete RS; clearAllSets(); return true; @@ -207,7 +210,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { MachineFrameInfo *MFI = Fn.getFrameInfo(); // Get the callee saved register list... - const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); // These are used to keep track the callee-save area. Initialize them. MinCSFrameIndex = INT_MAX; @@ -224,17 +227,9 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { std::vector CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; - if (Fn.getRegInfo().isPhysRegUsed(Reg)) { + if (Fn.getRegInfo().isPhysRegOrOverlapUsed(Reg)) { // If the reg is modified, save it! CSI.push_back(CalleeSavedInfo(Reg)); - } else { - for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); - *AliasSet; ++AliasSet) { // Check alias registers too. - if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) { - CSI.push_back(CalleeSavedInfo(Reg)); - break; - } - } } } @@ -332,7 +327,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Skip over all terminator instructions, which are part of the return // sequence. MachineBasicBlock::iterator I2 = I; - while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) + while (I2 != MBB->begin() && (--I2)->isTerminator()) I = I2; bool AtStart = I == MBB->begin(); @@ -426,11 +421,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Skip over all terminator instructions, which are part of the // return sequence. - if (! I->getDesc().isTerminator()) { + if (! I->isTerminator()) { ++I; } else { MachineBasicBlock::iterator I2 = I; - while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) + while (I2 != MBB->begin() && (--I2)->isTerminator()) I = I2; } } @@ -698,7 +693,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { // Add epilogue to restore the callee-save registers in each exiting block for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { // If last instruction is a return instruction, add an epilogue - if (!I->empty() && I->back().getDesc().isReturn()) + if (!I->empty() && I->back().isReturn()) TFI.emitEpilogue(Fn, *I); } @@ -706,7 +701,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { // we've been asked for it. This, when linked with a runtime with support // for segmented stacks (libgcc is one), will result in allocating stack // space in small chunks instead of one large contiguous block. - if (EnableSegmentedStacks) + if (Fn.getTarget().Options.EnableSegmentedStacks) TFI.adjustForSegmentedStacks(Fn); } @@ -813,6 +808,10 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { /// scavengeFrameVirtualRegs - Replace all frame index virtual registers /// with physical registers. Use the register scavenger to find an /// appropriate register to use. +/// +/// FIXME: Iterating over the instruction stream is unnecessary. We can simply +/// iterate over the vreg use list, which at this point only contains machine +/// operands for which eliminateFrameIndex need a new scratch reg. void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // Run through the instructions and find any virtual registers. for (MachineFunction::iterator BB = Fn.begin(), diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h index e2391591ad06..0d140a9bb481 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h @@ -40,10 +40,6 @@ namespace llvm { initializePEIPass(*PassRegistry::getPassRegistry()); } - const char *getPassName() const { - return "Prolog/Epilog Insertion & Frame Finalization"; - } - virtual void getAnalysisUsage(AnalysisUsage &AU) const; /// runOnMachineFunction - Insert prolog/epilog code and replace abstract diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp index 73b66d868f3d..49599b3ab980 100644 --- a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp +++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp @@ -87,7 +87,6 @@ bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const { this == getJumpTable()) return true; llvm_unreachable("Unknown PseudoSourceValue!"); - return false; } bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const { @@ -97,7 +96,6 @@ bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const { this == getJumpTable()) return false; llvm_unreachable("Unknown PseudoSourceValue!"); - return true; } bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const { diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp new file mode 100644 index 000000000000..b00eceb17f11 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp @@ -0,0 +1,280 @@ +//===-- RegAllocBase.cpp - Register Allocator Base Class ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the RegAllocBase class which provides comon functionality +// for LiveIntervalUnion-based register allocators. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "RegAllocBase.h" +#include "Spiller.h" +#include "VirtRegMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#ifndef NDEBUG +#include "llvm/ADT/SparseBitVector.h" +#endif +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Timer.h" + +using namespace llvm; + +STATISTIC(NumAssigned , "Number of registers assigned"); +STATISTIC(NumUnassigned , "Number of registers unassigned"); +STATISTIC(NumNewQueued , "Number of new live ranges queued"); + +// Temporary verification option until we can put verification inside +// MachineVerifier. +static cl::opt +VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), + cl::desc("Verify during register allocation")); + +const char *RegAllocBase::TimerGroupName = "Register Allocation"; +bool RegAllocBase::VerifyEnabled = false; + +#ifndef NDEBUG +// Verify each LiveIntervalUnion. +void RegAllocBase::verify() { + LiveVirtRegBitSet VisitedVRegs; + OwningArrayPtr + unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]); + + // Verify disjoint unions. + for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) { + DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI)); + LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg]; + PhysReg2LiveUnion[PhysReg].verify(VRegs); + // Union + intersection test could be done efficiently in one pass, but + // don't add a method to SparseBitVector unless we really need it. + assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions"); + VisitedVRegs |= VRegs; + } + + // Verify vreg coverage. + for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end(); + liItr != liEnd; ++liItr) { + unsigned reg = liItr->first; + if (TargetRegisterInfo::isPhysicalRegister(reg)) continue; + if (!VRM->hasPhys(reg)) continue; // spilled? + unsigned PhysReg = VRM->getPhys(reg); + if (!unionVRegs[PhysReg].test(reg)) { + dbgs() << "LiveVirtReg " << reg << " not in union " << + TRI->getName(PhysReg) << "\n"; + llvm_unreachable("unallocated live vreg"); + } + } + // FIXME: I'm not sure how to verify spilled intervals. +} +#endif //!NDEBUG + +//===----------------------------------------------------------------------===// +// RegAllocBase Implementation +//===----------------------------------------------------------------------===// + +// Instantiate a LiveIntervalUnion for each physical register. +void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator, + unsigned NRegs) { + NumRegs = NRegs; + Array = + static_cast(malloc(sizeof(LiveIntervalUnion)*NRegs)); + for (unsigned r = 0; r != NRegs; ++r) + new(Array + r) LiveIntervalUnion(r, allocator); +} + +void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) { + NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled); + TRI = &vrm.getTargetRegInfo(); + MRI = &vrm.getRegInfo(); + VRM = &vrm; + LIS = &lis; + MRI->freezeReservedRegs(vrm.getMachineFunction()); + RegClassInfo.runOnMachineFunction(vrm.getMachineFunction()); + + const unsigned NumRegs = TRI->getNumRegs(); + if (NumRegs != PhysReg2LiveUnion.numRegs()) { + PhysReg2LiveUnion.init(UnionAllocator, NumRegs); + // Cache an interferece query for each physical reg + Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]); + } +} + +void RegAllocBase::LiveUnionArray::clear() { + if (!Array) + return; + for (unsigned r = 0; r != NumRegs; ++r) + Array[r].~LiveIntervalUnion(); + free(Array); + NumRegs = 0; + Array = 0; +} + +void RegAllocBase::releaseMemory() { + for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r) + PhysReg2LiveUnion[r].clear(); +} + +// Visit all the live registers. If they are already assigned to a physical +// register, unify them with the corresponding LiveIntervalUnion, otherwise push +// them on the priority queue for later assignment. +void RegAllocBase::seedLiveRegs() { + NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled); + for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) { + unsigned RegNum = I->first; + LiveInterval &VirtReg = *I->second; + if (TargetRegisterInfo::isPhysicalRegister(RegNum)) + PhysReg2LiveUnion[RegNum].unify(VirtReg); + else + enqueue(&VirtReg); + } +} + +void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) { + DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI) + << " to " << PrintReg(PhysReg, TRI) << '\n'); + assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); + VRM->assignVirt2Phys(VirtReg.reg, PhysReg); + MRI->setPhysRegUsed(PhysReg); + PhysReg2LiveUnion[PhysReg].unify(VirtReg); + ++NumAssigned; +} + +void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) { + DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI) + << " from " << PrintReg(PhysReg, TRI) << '\n'); + assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign"); + PhysReg2LiveUnion[PhysReg].extract(VirtReg); + VRM->clearVirt(VirtReg.reg); + ++NumUnassigned; +} + +// Top-level driver to manage the queue of unassigned VirtRegs and call the +// selectOrSplit implementation. +void RegAllocBase::allocatePhysRegs() { + seedLiveRegs(); + + // Continue assigning vregs one at a time to available physical registers. + while (LiveInterval *VirtReg = dequeue()) { + assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned"); + + // Unused registers can appear when the spiller coalesces snippets. + if (MRI->reg_nodbg_empty(VirtReg->reg)) { + DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n'); + LIS->removeInterval(VirtReg->reg); + continue; + } + + // Invalidate all interference queries, live ranges could have changed. + invalidateVirtRegs(); + + // selectOrSplit requests the allocator to return an available physical + // register if possible and populate a list of new live intervals that + // result from splitting. + DEBUG(dbgs() << "\nselectOrSplit " + << MRI->getRegClass(VirtReg->reg)->getName() + << ':' << *VirtReg << '\n'); + typedef SmallVector VirtRegVec; + VirtRegVec SplitVRegs; + unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); + + if (AvailablePhysReg == ~0u) { + // selectOrSplit failed to find a register! + const char *Msg = "ran out of registers during register allocation"; + // Probably caused by an inline asm. + MachineInstr *MI; + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg); + (MI = I.skipInstruction());) + if (MI->isInlineAsm()) + break; + if (MI) + MI->emitError(Msg); + else + report_fatal_error(Msg); + // Keep going after reporting the error. + VRM->assignVirt2Phys(VirtReg->reg, + RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front()); + continue; + } + + if (AvailablePhysReg) + assign(*VirtReg, AvailablePhysReg); + + for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); + I != E; ++I) { + LiveInterval *SplitVirtReg = *I; + assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); + if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { + DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); + LIS->removeInterval(SplitVirtReg->reg); + continue; + } + DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); + assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && + "expect split value in virtual register"); + enqueue(SplitVirtReg); + ++NumNewQueued; + } + } +} + +// Check if this live virtual register interferes with a physical register. If +// not, then check for interference on each register that aliases with the +// physical register. Return the interfering register. +unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg, + unsigned PhysReg) { + for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) + if (query(VirtReg, *AliasI).checkInterference()) + return *AliasI; + return 0; +} + +// Add newly allocated physical registers to the MBB live in sets. +void RegAllocBase::addMBBLiveIns(MachineFunction *MF) { + NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled); + SlotIndexes *Indexes = LIS->getSlotIndexes(); + if (MF->size() <= 1) + return; + + LiveIntervalUnion::SegmentIter SI; + for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) { + LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg]; + if (LiveUnion.empty()) + continue; + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:"); + MachineFunction::iterator MBB = llvm::next(MF->begin()); + MachineFunction::iterator MFE = MF->end(); + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(MBB); + SI.setMap(LiveUnion.getMap()); + SI.find(Start); + while (SI.valid()) { + if (SI.start() <= Start) { + if (!MBB->isLiveIn(PhysReg)) + MBB->addLiveIn(PhysReg); + DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':' + << PrintReg(SI.value()->reg, TRI)); + } else if (SI.start() > Stop) + MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex()); + if (++MBB == MFE) + break; + tie(Start, Stop) = Indexes->getMBBRange(MBB); + SI.advanceTo(Start); + } + DEBUG(dbgs() << '\n'); + } +} + diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h index 031642117efc..072fe2bdb656 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBase.h +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h @@ -49,11 +49,6 @@ class VirtRegMap; class LiveIntervals; class Spiller; -// Forward declare a priority queue of live virtual registers. If an -// implementation needs to prioritize by anything other than spill weight, then -// this will become an abstract base class with virtual calls to push/get. -class LiveVirtRegQueue; - /// RegAllocBase provides the register allocation driver and interface that can /// be extended to add interesting heuristics. /// @@ -67,7 +62,6 @@ class RegAllocBase { // registers may have changed. unsigned UserTag; -protected: // Array of LiveIntervalUnions indexed by physical register. class LiveUnionArray { unsigned NumRegs; @@ -88,17 +82,19 @@ class RegAllocBase { } }; - const TargetRegisterInfo *TRI; - MachineRegisterInfo *MRI; - VirtRegMap *VRM; - LiveIntervals *LIS; - RegisterClassInfo RegClassInfo; LiveUnionArray PhysReg2LiveUnion; // Current queries, one per physreg. They must be reinitialized each time we // query on a new live virtual register. OwningArrayPtr Queries; +protected: + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + VirtRegMap *VRM; + LiveIntervals *LIS; + RegisterClassInfo RegClassInfo; + RegAllocBase(): UserTag(0), TRI(0), MRI(0), VRM(0), LIS(0) {} virtual ~RegAllocBase() {} @@ -115,16 +111,17 @@ class RegAllocBase { return Queries[PhysReg]; } + // Get direct access to the underlying LiveIntervalUnion for PhysReg. + LiveIntervalUnion &getLiveUnion(unsigned PhysReg) { + return PhysReg2LiveUnion[PhysReg]; + } + // Invalidate all cached information about virtual registers - live ranges may // have changed. void invalidateVirtRegs() { ++UserTag; } // The top-level driver. The output is a VirtRegMap that us updated with // physical register assignments. - // - // If an implementation wants to override the LiveInterval comparator, we - // should modify this interface to allow passing in an instance derived from - // LiveVirtRegQueue. void allocatePhysRegs(); // Get a temporary reference to a Spiller instance. @@ -160,12 +157,6 @@ class RegAllocBase { /// allocation is making progress. void unassign(LiveInterval &VirtReg, unsigned PhysReg); - // Helper for spilling all live virtual registers currently unified under preg - // that interfere with the most recently queried lvr. Return true if spilling - // was successful, and append any new spilled/split intervals to splitLVRs. - bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl &SplitVRegs); - /// addMBBLiveIns - Add physreg liveins to basic blocks. void addMBBLiveIns(MachineFunction *); @@ -183,9 +174,6 @@ class RegAllocBase { private: void seedLiveRegs(); - - void spillReg(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl &SplitVRegs); }; } // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp index 5496d69fd3df..77ee3148f31a 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -1,4 +1,4 @@ -//===-- RegAllocBasic.cpp - basic register allocator ----------------------===// +//===-- RegAllocBasic.cpp - Basic Register Allocator ----------------------===// // // The LLVM Compiler Infrastructure // @@ -15,18 +15,15 @@ #define DEBUG_TYPE "regalloc" #include "RegAllocBase.h" #include "LiveDebugVariables.h" -#include "LiveIntervalUnion.h" -#include "LiveRangeEdit.h" #include "RenderMachineFunction.h" #include "Spiller.h" #include "VirtRegMap.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Function.h" #include "llvm/PassAnalysisSupport.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -37,35 +34,17 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" -#ifndef NDEBUG -#include "llvm/ADT/SparseBitVector.h" -#endif #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Timer.h" #include #include using namespace llvm; -STATISTIC(NumAssigned , "Number of registers assigned"); -STATISTIC(NumUnassigned , "Number of registers unassigned"); -STATISTIC(NumNewQueued , "Number of new live ranges queued"); - static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator", createBasicRegisterAllocator); -// Temporary verification option until we can put verification inside -// MachineVerifier. -static cl::opt -VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), - cl::desc("Verify during register allocation")); - -const char *RegAllocBase::TimerGroupName = "Register Allocation"; -bool RegAllocBase::VerifyEnabled = false; - namespace { struct CompSpillWeight { bool operator()(LiveInterval *A, LiveInterval *B) const { @@ -93,6 +72,11 @@ class RABasic : public MachineFunctionPass, public RegAllocBase std::auto_ptr SpillerInstance; std::priority_queue, CompSpillWeight> Queue; + + // Scratch space. Allocated here to avoid repeated malloc calls in + // selectOrSplit(). + BitVector UsableRegs; + public: RABasic(); @@ -128,6 +112,15 @@ class RABasic : public MachineFunctionPass, public RegAllocBase /// Perform register allocation. virtual bool runOnMachineFunction(MachineFunction &mf); + // Helper for spilling all live virtual registers currently unified under preg + // that interfere with the most recently queried lvr. Return true if spilling + // was successful, and append any new spilled/split intervals to splitLVRs. + bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, + SmallVectorImpl &SplitVRegs); + + void spillReg(LiveInterval &VirtReg, unsigned PhysReg, + SmallVectorImpl &SplitVRegs); + static char ID; }; @@ -139,8 +132,8 @@ RABasic::RABasic(): MachineFunctionPass(ID) { initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); - initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); + initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); @@ -157,9 +150,6 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - if (StrongPHIElim) - AU.addRequiredID(StrongPHIEliminationID); - AU.addRequiredTransitiveID(RegisterCoalescerPassID); AU.addRequired(); AU.addRequired(); AU.addPreserved(); @@ -178,204 +168,10 @@ void RABasic::releaseMemory() { RegAllocBase::releaseMemory(); } -#ifndef NDEBUG -// Verify each LiveIntervalUnion. -void RegAllocBase::verify() { - LiveVirtRegBitSet VisitedVRegs; - OwningArrayPtr - unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]); - - // Verify disjoint unions. - for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) { - DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI)); - LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg]; - PhysReg2LiveUnion[PhysReg].verify(VRegs); - // Union + intersection test could be done efficiently in one pass, but - // don't add a method to SparseBitVector unless we really need it. - assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions"); - VisitedVRegs |= VRegs; - } - - // Verify vreg coverage. - for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end(); - liItr != liEnd; ++liItr) { - unsigned reg = liItr->first; - if (TargetRegisterInfo::isPhysicalRegister(reg)) continue; - if (!VRM->hasPhys(reg)) continue; // spilled? - unsigned PhysReg = VRM->getPhys(reg); - if (!unionVRegs[PhysReg].test(reg)) { - dbgs() << "LiveVirtReg " << reg << " not in union " << - TRI->getName(PhysReg) << "\n"; - llvm_unreachable("unallocated live vreg"); - } - } - // FIXME: I'm not sure how to verify spilled intervals. -} -#endif //!NDEBUG - -//===----------------------------------------------------------------------===// -// RegAllocBase Implementation -//===----------------------------------------------------------------------===// - -// Instantiate a LiveIntervalUnion for each physical register. -void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator, - unsigned NRegs) { - NumRegs = NRegs; - Array = - static_cast(malloc(sizeof(LiveIntervalUnion)*NRegs)); - for (unsigned r = 0; r != NRegs; ++r) - new(Array + r) LiveIntervalUnion(r, allocator); -} - -void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) { - NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled); - TRI = &vrm.getTargetRegInfo(); - MRI = &vrm.getRegInfo(); - VRM = &vrm; - LIS = &lis; - RegClassInfo.runOnMachineFunction(vrm.getMachineFunction()); - - const unsigned NumRegs = TRI->getNumRegs(); - if (NumRegs != PhysReg2LiveUnion.numRegs()) { - PhysReg2LiveUnion.init(UnionAllocator, NumRegs); - // Cache an interferece query for each physical reg - Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]); - } -} - -void RegAllocBase::LiveUnionArray::clear() { - if (!Array) - return; - for (unsigned r = 0; r != NumRegs; ++r) - Array[r].~LiveIntervalUnion(); - free(Array); - NumRegs = 0; - Array = 0; -} - -void RegAllocBase::releaseMemory() { - for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r) - PhysReg2LiveUnion[r].clear(); -} - -// Visit all the live registers. If they are already assigned to a physical -// register, unify them with the corresponding LiveIntervalUnion, otherwise push -// them on the priority queue for later assignment. -void RegAllocBase::seedLiveRegs() { - NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled); - for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) { - unsigned RegNum = I->first; - LiveInterval &VirtReg = *I->second; - if (TargetRegisterInfo::isPhysicalRegister(RegNum)) - PhysReg2LiveUnion[RegNum].unify(VirtReg); - else - enqueue(&VirtReg); - } -} - -void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) { - DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI) - << " to " << PrintReg(PhysReg, TRI) << '\n'); - assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); - VRM->assignVirt2Phys(VirtReg.reg, PhysReg); - MRI->setPhysRegUsed(PhysReg); - PhysReg2LiveUnion[PhysReg].unify(VirtReg); - ++NumAssigned; -} - -void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) { - DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI) - << " from " << PrintReg(PhysReg, TRI) << '\n'); - assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign"); - PhysReg2LiveUnion[PhysReg].extract(VirtReg); - VRM->clearVirt(VirtReg.reg); - ++NumUnassigned; -} - -// Top-level driver to manage the queue of unassigned VirtRegs and call the -// selectOrSplit implementation. -void RegAllocBase::allocatePhysRegs() { - seedLiveRegs(); - - // Continue assigning vregs one at a time to available physical registers. - while (LiveInterval *VirtReg = dequeue()) { - assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned"); - - // Unused registers can appear when the spiller coalesces snippets. - if (MRI->reg_nodbg_empty(VirtReg->reg)) { - DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n'); - LIS->removeInterval(VirtReg->reg); - continue; - } - - // Invalidate all interference queries, live ranges could have changed. - invalidateVirtRegs(); - - // selectOrSplit requests the allocator to return an available physical - // register if possible and populate a list of new live intervals that - // result from splitting. - DEBUG(dbgs() << "\nselectOrSplit " - << MRI->getRegClass(VirtReg->reg)->getName() - << ':' << *VirtReg << '\n'); - typedef SmallVector VirtRegVec; - VirtRegVec SplitVRegs; - unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); - - if (AvailablePhysReg == ~0u) { - // selectOrSplit failed to find a register! - const char *Msg = "ran out of registers during register allocation"; - // Probably caused by an inline asm. - MachineInstr *MI; - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg); - (MI = I.skipInstruction());) - if (MI->isInlineAsm()) - break; - if (MI) - MI->emitError(Msg); - else - report_fatal_error(Msg); - // Keep going after reporting the error. - VRM->assignVirt2Phys(VirtReg->reg, - RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front()); - continue; - } - - if (AvailablePhysReg) - assign(*VirtReg, AvailablePhysReg); - - for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); - I != E; ++I) { - LiveInterval *SplitVirtReg = *I; - assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); - if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { - DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); - LIS->removeInterval(SplitVirtReg->reg); - continue; - } - DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); - assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && - "expect split value in virtual register"); - enqueue(SplitVirtReg); - ++NumNewQueued; - } - } -} - -// Check if this live virtual register interferes with a physical register. If -// not, then check for interference on each register that aliases with the -// physical register. Return the interfering register. -unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg, - unsigned PhysReg) { - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) - if (query(VirtReg, *AliasI).checkInterference()) - return *AliasI; - return 0; -} - -// Helper for spillInteferences() that spills all interfering vregs currently +// Helper for spillInterferences() that spills all interfering vregs currently // assigned to this physical register. -void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg, - SmallVectorImpl &SplitVRegs) { +void RABasic::spillReg(LiveInterval& VirtReg, unsigned PhysReg, + SmallVectorImpl &SplitVRegs) { LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg); assert(Q.seenAllInterferences() && "need collectInterferences()"); const SmallVectorImpl &PendingSpills = Q.interferingVRegs(); @@ -391,7 +187,7 @@ void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg, unassign(SpilledVReg, PhysReg); // Spill the extracted interval. - LiveRangeEdit LRE(SpilledVReg, SplitVRegs, 0, &PendingSpills); + LiveRangeEdit LRE(SpilledVReg, SplitVRegs, *MF, *LIS, VRM); spiller().spill(LRE); } // After extracting segments, the query's results are invalid. But keep the @@ -402,14 +198,13 @@ void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg, // Spill or split all live virtual registers currently unified under PhysReg // that interfere with VirtReg. The newly spilled or split live intervals are // returned by appending them to SplitVRegs. -bool -RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, +bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, SmallVectorImpl &SplitVRegs) { // Record each interference and determine if all are spillable before mutating // either the union or live intervals. unsigned NumInterferences = 0; // Collect interferences assigned to any alias of the physical register. - for (const unsigned *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) { + for (const uint16_t *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) { LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI); NumInterferences += QAlias.collectInterferingVRegs(); if (QAlias.seenUnspillableVReg()) { @@ -421,52 +216,11 @@ RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, assert(NumInterferences > 0 && "expect interference"); // Spill each interfering vreg allocated to PhysReg or an alias. - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) + for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) spillReg(VirtReg, *AliasI, SplitVRegs); return true; } -// Add newly allocated physical registers to the MBB live in sets. -void RegAllocBase::addMBBLiveIns(MachineFunction *MF) { - NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled); - SlotIndexes *Indexes = LIS->getSlotIndexes(); - if (MF->size() <= 1) - return; - - LiveIntervalUnion::SegmentIter SI; - for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) { - LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg]; - if (LiveUnion.empty()) - continue; - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:"); - MachineFunction::iterator MBB = llvm::next(MF->begin()); - MachineFunction::iterator MFE = MF->end(); - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(MBB); - SI.setMap(LiveUnion.getMap()); - SI.find(Start); - while (SI.valid()) { - if (SI.start() <= Start) { - if (!MBB->isLiveIn(PhysReg)) - MBB->addLiveIn(PhysReg); - DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':' - << PrintReg(SI.value()->reg, TRI)); - } else if (SI.start() > Stop) - MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex()); - if (++MBB == MFE) - break; - tie(Start, Stop) = Indexes->getMBBRange(MBB); - SI.advanceTo(Start); - } - DEBUG(dbgs() << '\n'); - } -} - - -//===----------------------------------------------------------------------===// -// RABasic Implementation -//===----------------------------------------------------------------------===// - // Driver for the register assignment and splitting heuristics. // Manages iteration over the LiveIntervalUnions. // @@ -481,6 +235,10 @@ void RegAllocBase::addMBBLiveIns(MachineFunction *MF) { // selectOrSplit(). unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl &SplitVRegs) { + // Check for register mask interference. When live ranges cross calls, the + // set of usable registers is reduced to the callee-saved ones. + bool CrossRegMasks = LIS->checkRegMaskInterference(VirtReg, UsableRegs); + // Populate a list of physical register spill candidates. SmallVector PhysRegSpillCands; @@ -491,6 +249,11 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, ++I) { unsigned PhysReg = *I; + // If PhysReg is clobbered by a register mask, it isn't useful for + // allocation or spilling. + if (CrossRegMasks && !UsableRegs.test(PhysReg)) + continue; + // Check interference and as a side effect, intialize queries for this // VirtReg and its aliases. unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg); @@ -498,9 +261,9 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, // Found an available register. return PhysReg; } - Queries[interfReg].collectInterferingVRegs(1); - LiveInterval *interferingVirtReg = - Queries[interfReg].interferingVRegs().front(); + LiveIntervalUnion::Query &IntfQ = query(VirtReg, interfReg); + IntfQ.collectInterferingVRegs(1); + LiveInterval *interferingVirtReg = IntfQ.interferingVRegs().front(); // The current VirtReg must either be spillable, or one of its interferences // must have less spill weight. @@ -524,7 +287,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); if (!VirtReg.isSpillable()) return ~0u; - LiveRangeEdit LRE(VirtReg, SplitVRegs); + LiveRangeEdit LRE(VirtReg, SplitVRegs, *MF, *LIS, VRM); spiller().spill(LRE); // The live virtual register requesting allocation was spilled, so tell @@ -579,7 +342,10 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { // Write out new DBG_VALUE instructions. getAnalysis().emitDebugValues(VRM); - // The pass output is in VirtRegMap. Release all the transient data. + // All machine operands and other references to virtual registers have been + // replaced. Remove the virtual registers and release all the transient data. + VRM->clearAllVirt(); + MRI->clearVirtRegs(); releaseMemory(); return true; diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp index b36a445291b7..e09b7f8d26be 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp @@ -32,6 +32,7 @@ #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include @@ -49,10 +50,7 @@ namespace { public: static char ID; RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1), - isBulkSpilling(false) { - initializePHIEliminationPass(*PassRegistry::getPassRegistry()); - initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); - } + isBulkSpilling(false) {} private: const TargetMachine *TM; MachineFunction *MF; @@ -71,16 +69,20 @@ namespace { // Everything we know about a live virtual register. struct LiveReg { MachineInstr *LastUse; // Last instr to use reg. + unsigned VirtReg; // Virtual register number. unsigned PhysReg; // Currently held here. unsigned short LastOpNum; // OpNum on LastUse. bool Dirty; // Register needs spill. - LiveReg(unsigned p=0) : LastUse(0), PhysReg(p), LastOpNum(0), - Dirty(false) {} + explicit LiveReg(unsigned v) + : LastUse(0), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false) {} + + unsigned getSparseSetKey() const { + return TargetRegisterInfo::virtReg2Index(VirtReg); + } }; - typedef DenseMap LiveRegMap; - typedef LiveRegMap::value_type LiveRegEntry; + typedef SparseSet LiveRegMap; // LiveVirtRegs - This map contains entries for each virtual register // that is currently available in a physical register. @@ -137,8 +139,6 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequiredID(PHIEliminationID); - AU.addRequiredID(TwoAddressInstructionPassID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -159,14 +159,23 @@ namespace { void usePhysReg(MachineOperand&); void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState); unsigned calcSpillCost(unsigned PhysReg) const; - void assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg); - void allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint); + void assignVirtToPhysReg(LiveReg&, unsigned PhysReg); + LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) { + return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); + } + LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const { + return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); + } + LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, unsigned PhysReg); + LiveRegMap::iterator allocVirtReg(MachineInstr *MI, LiveRegMap::iterator, + unsigned Hint); LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum, unsigned VirtReg, unsigned Hint); LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum, unsigned VirtReg, unsigned Hint); void spillAll(MachineInstr *MI); bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg); + void addRetOperands(MachineBasicBlock *MBB); }; char RAFast::ID = 0; } @@ -222,10 +231,10 @@ void RAFast::addKillFlag(const LiveReg &LR) { /// killVirtReg - Mark virtreg as no longer available. void RAFast::killVirtReg(LiveRegMap::iterator LRI) { - addKillFlag(LRI->second); - const LiveReg &LR = LRI->second; - assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping"); - PhysRegState[LR.PhysReg] = regFree; + addKillFlag(*LRI); + assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg && + "Broken RegState mapping"); + PhysRegState[LRI->PhysReg] = regFree; // Erase from LiveVirtRegs unless we're spilling in bulk. if (!isBulkSpilling) LiveVirtRegs.erase(LRI); @@ -235,7 +244,7 @@ void RAFast::killVirtReg(LiveRegMap::iterator LRI) { void RAFast::killVirtReg(unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "killVirtReg needs a virtual register"); - LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg); + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); if (LRI != LiveVirtRegs.end()) killVirtReg(LRI); } @@ -245,7 +254,7 @@ void RAFast::killVirtReg(unsigned VirtReg) { void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Spilling a physical register is illegal!"); - LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg); + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register"); spillVirtReg(MI, LRI); } @@ -253,18 +262,18 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) { /// spillVirtReg - Do the actual work of spilling. void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator LRI) { - LiveReg &LR = LRI->second; - assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping"); + LiveReg &LR = *LRI; + assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping"); if (LR.Dirty) { // If this physreg is used by the instruction, we want to kill it on the // instruction, not on the spill. bool SpillKill = LR.LastUse != MI; LR.Dirty = false; - DEBUG(dbgs() << "Spilling " << PrintReg(LRI->first, TRI) + DEBUG(dbgs() << "Spilling " << PrintReg(LRI->VirtReg, TRI) << " in " << PrintReg(LR.PhysReg, TRI)); - const TargetRegisterClass *RC = MRI->getRegClass(LRI->first); - int FI = getStackSpaceFor(LRI->first, RC); + const TargetRegisterClass *RC = MRI->getRegClass(LRI->VirtReg); + int FI = getStackSpaceFor(LRI->VirtReg, RC); DEBUG(dbgs() << " to stack slot #" << FI << "\n"); TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI); ++NumStores; // Update statistics @@ -272,7 +281,8 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, // If this register is used by DBG_VALUE then insert new DBG_VALUE to // identify spilled location as the place to find corresponding variable's // value. - SmallVector &LRIDbgValues = LiveDbgValueMap[LRI->first]; + SmallVector &LRIDbgValues = + LiveDbgValueMap[LRI->VirtReg]; for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) { MachineInstr *DBG = LRIDbgValues[li]; const MDNode *MDPtr = @@ -295,8 +305,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); } } - // Now this register is spilled there is should not be any DBG_VALUE pointing - // to this register because they are all pointing to spilled value now. + // Now this register is spilled there is should not be any DBG_VALUE + // pointing to this register because they are all pointing to spilled value + // now. LRIDbgValues.clear(); if (SpillKill) LR.LastUse = 0; // Don't kill register again @@ -343,7 +354,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { } // Maybe a superregister is reserved? - for (const unsigned *AS = TRI->getAliasSet(PhysReg); + for (const uint16_t *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { switch (PhysRegState[Alias]) { case regDisabled: @@ -397,7 +408,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, // This is a disabled register, disable all aliases. PhysRegState[PhysReg] = NewState; - for (const unsigned *AS = TRI->getAliasSet(PhysReg); + for (const uint16_t *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: @@ -435,14 +446,17 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding " << PrintReg(PhysReg, TRI) << " is reserved already.\n"); return spillImpossible; - default: - return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean; + default: { + LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); + assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); + return I->Dirty ? spillDirty : spillClean; + } } // This is a disabled register, add up cost of aliases. DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n"); unsigned Cost = 0; - for (const unsigned *AS = TRI->getAliasSet(PhysReg); + for (const uint16_t *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { if (UsedInInstr.test(Alias)) return spillImpossible; @@ -454,10 +468,13 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { break; case regReserved: return spillImpossible; - default: - Cost += LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean; + default: { + LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); + assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); + Cost += I->Dirty ? spillDirty : spillClean; break; } + } } return Cost; } @@ -467,17 +484,27 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { /// that PhysReg is the proper container for VirtReg now. The physical /// register must not be used for anything else when this is called. /// -void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) { - DEBUG(dbgs() << "Assigning " << PrintReg(LRE.first, TRI) << " to " +void RAFast::assignVirtToPhysReg(LiveReg &LR, unsigned PhysReg) { + DEBUG(dbgs() << "Assigning " << PrintReg(LR.VirtReg, TRI) << " to " << PrintReg(PhysReg, TRI) << "\n"); - PhysRegState[PhysReg] = LRE.first; - assert(!LRE.second.PhysReg && "Already assigned a physreg"); - LRE.second.PhysReg = PhysReg; + PhysRegState[PhysReg] = LR.VirtReg; + assert(!LR.PhysReg && "Already assigned a physreg"); + LR.PhysReg = PhysReg; +} + +RAFast::LiveRegMap::iterator +RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared"); + assignVirtToPhysReg(*LRI, PhysReg); + return LRI; } /// allocVirtReg - Allocate a physical register for VirtReg. -void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { - const unsigned VirtReg = LRE.first; +RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, + LiveRegMap::iterator LRI, + unsigned Hint) { + const unsigned VirtReg = LRI->VirtReg; assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Can only allocate virtual registers"); @@ -496,7 +523,9 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { if (Cost < spillDirty) { if (Cost) definePhysReg(MI, Hint, regFree); - return assignVirtToPhysReg(LRE, Hint); + // definePhysReg may kill virtual registers and modify LiveVirtRegs. + // That invalidates LRI, so run a new lookup for VirtReg. + return assignVirtToPhysReg(VirtReg, Hint); } } @@ -505,8 +534,10 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { // First try to find a completely free register. for (ArrayRef::iterator I = AO.begin(), E = AO.end(); I != E; ++I) { unsigned PhysReg = *I; - if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) - return assignVirtToPhysReg(LRE, PhysReg); + if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) { + assignVirtToPhysReg(*LRI, PhysReg); + return LRI; + } } DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from " @@ -519,21 +550,25 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { DEBUG(dbgs() << "\tCost: " << Cost << "\n"); DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n"); // Cost is 0 when all aliases are already disabled. - if (Cost == 0) - return assignVirtToPhysReg(LRE, *I); + if (Cost == 0) { + assignVirtToPhysReg(*LRI, *I); + return LRI; + } if (Cost < BestCost) BestReg = *I, BestCost = Cost; } if (BestReg) { definePhysReg(MI, BestReg, regFree); - return assignVirtToPhysReg(LRE, BestReg); + // definePhysReg may kill virtual registers and modify LiveVirtRegs. + // That invalidates LRI, so run a new lookup for VirtReg. + return assignVirtToPhysReg(VirtReg, BestReg); } // Nothing we can do. Report an error and keep going with a bad allocation. MI->emitError("ran out of registers during register allocation"); definePhysReg(MI, *AO.begin(), regFree); - assignVirtToPhysReg(LRE, *AO.begin()); + return assignVirtToPhysReg(VirtReg, *AO.begin()); } /// defineVirtReg - Allocate a register for VirtReg and mark it as dirty. @@ -544,8 +579,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, "Not a virtual register"); LiveRegMap::iterator LRI; bool New; - tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); - LiveReg &LR = LRI->second; + tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); if (New) { // If there is no hint, peek at the only use of this register. if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && @@ -555,18 +589,18 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, if (UseMI.isCopyLike()) Hint = UseMI.getOperand(0).getReg(); } - allocVirtReg(MI, *LRI, Hint); - } else if (LR.LastUse) { + LRI = allocVirtReg(MI, LRI, Hint); + } else if (LRI->LastUse) { // Redefining a live register - kill at the last use, unless it is this // instruction defining VirtReg multiple times. - if (LR.LastUse != MI || LR.LastUse->getOperand(LR.LastOpNum).isUse()) - addKillFlag(LR); + if (LRI->LastUse != MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse()) + addKillFlag(*LRI); } - assert(LR.PhysReg && "Register not assigned"); - LR.LastUse = MI; - LR.LastOpNum = OpNum; - LR.Dirty = true; - UsedInInstr.set(LR.PhysReg); + assert(LRI->PhysReg && "Register not assigned"); + LRI->LastUse = MI; + LRI->LastOpNum = OpNum; + LRI->Dirty = true; + UsedInInstr.set(LRI->PhysReg); return LRI; } @@ -578,18 +612,17 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, "Not a virtual register"); LiveRegMap::iterator LRI; bool New; - tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); - LiveReg &LR = LRI->second; + tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); MachineOperand &MO = MI->getOperand(OpNum); if (New) { - allocVirtReg(MI, *LRI, Hint); + LRI = allocVirtReg(MI, LRI, Hint); const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into " - << PrintReg(LR.PhysReg, TRI) << "\n"); - TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI); + << PrintReg(LRI->PhysReg, TRI) << "\n"); + TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, RC, TRI); ++NumLoads; - } else if (LR.Dirty) { + } else if (LRI->Dirty) { if (isLastUseOfLocalReg(MO)) { DEBUG(dbgs() << "Killing last use: " << MO << "\n"); if (MO.isUse()) @@ -614,10 +647,10 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n"); MO.setIsDead(false); } - assert(LR.PhysReg && "Register not assigned"); - LR.LastUse = MI; - LR.LastOpNum = OpNum; - UsedInInstr.set(LR.PhysReg); + assert(LRI->PhysReg && "Register not assigned"); + LRI->LastUse = MI; + LRI->LastOpNum = OpNum; + UsedInInstr.set(LRI->PhysReg); return LRI; } @@ -674,7 +707,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, UsedInInstr.set(Reg); if (ThroughRegs.count(PhysRegState[Reg])) definePhysReg(MI, Reg, regFree); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) { UsedInInstr.set(*AS); if (ThroughRegs.count(PhysRegState[*AS])) definePhysReg(MI, *AS, regFree); @@ -682,7 +715,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, } SmallVector PartialDefs; - DEBUG(dbgs() << "Allocating tied uses and early clobbers.\n"); + DEBUG(dbgs() << "Allocating tied uses.\n"); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; @@ -694,7 +727,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand " << DefIdx << ".\n"); LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); - unsigned PhysReg = LRI->second.PhysReg; + unsigned PhysReg = LRI->PhysReg; setPhysReg(MI, i, PhysReg); // Note: we don't update the def operand yet. That would cause the normal // def-scan to attempt spilling. @@ -703,16 +736,25 @@ void RAFast::handleThroughOperands(MachineInstr *MI, // Reload the register, but don't assign to the operand just yet. // That would confuse the later phys-def processing pass. LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); - PartialDefs.push_back(LRI->second.PhysReg); - } else if (MO.isEarlyClobber()) { - // Note: defineVirtReg may invalidate MO. - LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); - unsigned PhysReg = LRI->second.PhysReg; - if (setPhysReg(MI, i, PhysReg)) - VirtDead.push_back(Reg); + PartialDefs.push_back(LRI->PhysReg); } } + DEBUG(dbgs() << "Allocating early clobbers.\n"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + if (!MO.isEarlyClobber()) + continue; + // Note: defineVirtReg may invalidate MO. + LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); + unsigned PhysReg = LRI->PhysReg; + if (setPhysReg(MI, i, PhysReg)) + VirtDead.push_back(Reg); + } + // Restore UsedInInstr to a state usable for allocating normal virtual uses. UsedInInstr.reset(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -730,32 +772,66 @@ void RAFast::handleThroughOperands(MachineInstr *MI, UsedInInstr.set(PartialDefs[i]); } +/// addRetOperand - ensure that a return instruction has an operand for each +/// value live out of the function. +/// +/// Things marked both call and return are tail calls; do not do this for them. +/// The tail callee need not take the same registers as input that it produces +/// as output, and there are dependencies for its input registers elsewhere. +/// +/// FIXME: This should be done as part of instruction selection, and this helper +/// should be deleted. Until then, we use custom logic here to create the proper +/// operand under all circumstances. We can't use addRegisterKilled because that +/// doesn't make sense for undefined values. We can't simply avoid calling it +/// for undefined values, because we must ensure that the operand always exists. +void RAFast::addRetOperands(MachineBasicBlock *MBB) { + if (MBB->empty() || !MBB->back().isReturn() || MBB->back().isCall()) + return; + + MachineInstr *MI = &MBB->back(); + + for (MachineRegisterInfo::liveout_iterator + I = MBB->getParent()->getRegInfo().liveout_begin(), + E = MBB->getParent()->getRegInfo().liveout_end(); I != E; ++I) { + unsigned Reg = *I; + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Cannot have a live-out virtual register."); + + bool hasDef = PhysRegState[Reg] == regReserved; + + // Check if this register already has an operand. + bool Found = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + + unsigned OperReg = MO.getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(OperReg)) + continue; + + if (OperReg == Reg || TRI->isSuperRegister(OperReg, Reg)) { + // If the ret already has an operand for this physreg or a superset, + // don't duplicate it. Set the kill flag if the value is defined. + if (hasDef && !MO.isKill()) + MO.setIsKill(); + Found = true; + break; + } + } + if (!Found) + MI->addOperand(MachineOperand::CreateReg(Reg, + false /*IsDef*/, + true /*IsImp*/, + hasDef/*IsKill*/)); + } +} + void RAFast::AllocateBasicBlock() { DEBUG(dbgs() << "\nAllocating " << *MBB); - // FIXME: This should probably be added by instruction selection instead? - // If the last instruction in the block is a return, make sure to mark it as - // using all of the live-out values in the function. Things marked both call - // and return are tail calls; do not do this for them. The tail callee need - // not take the same registers as input that it produces as output, and there - // are dependencies for its input registers elsewhere. - if (!MBB->empty() && MBB->back().getDesc().isReturn() && - !MBB->back().getDesc().isCall()) { - MachineInstr *Ret = &MBB->back(); - - for (MachineRegisterInfo::liveout_iterator - I = MF->getRegInfo().liveout_begin(), - E = MF->getRegInfo().liveout_end(); I != E; ++I) { - assert(TargetRegisterInfo::isPhysicalRegister(*I) && - "Cannot have a live-out virtual register."); - - // Add live-out registers as implicit uses. - Ret->addRegisterKilled(*I, TRI, true); - } - } - PhysRegState.assign(TRI->getNumRegs(), regDisabled); - assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?"); + assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); MachineBasicBlock::iterator MII = MBB->begin(); @@ -783,25 +859,26 @@ void RAFast::AllocateBasicBlock() { case regReserved: dbgs() << "*"; break; - default: + default: { dbgs() << '=' << PrintReg(PhysRegState[Reg]); - if (LiveVirtRegs[PhysRegState[Reg]].Dirty) + LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]); + assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); + if (I->Dirty) dbgs() << "*"; - assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg && - "Bad inverse map"); + assert(I->PhysReg == Reg && "Bad inverse map"); break; } + } } dbgs() << '\n'; // Check that LiveVirtRegs is the inverse. for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); i != e; ++i) { - assert(TargetRegisterInfo::isVirtualRegister(i->first) && + assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) && "Bad map key"); - assert(TargetRegisterInfo::isPhysicalRegister(i->second.PhysReg) && + assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) && "Bad map value"); - assert(PhysRegState[i->second.PhysReg] == i->first && - "Bad inverse map"); + assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map"); } }); @@ -815,10 +892,9 @@ void RAFast::AllocateBasicBlock() { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - LiveDbgValueMap[Reg].push_back(MI); - LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg); + LiveRegMap::iterator LRI = findLiveVirtReg(Reg); if (LRI != LiveVirtRegs.end()) - setPhysReg(MI, i, LRI->second.PhysReg); + setPhysReg(MI, i, LRI->PhysReg); else { int SS = StackSlotForVirtReg[Reg]; if (SS == -1) { @@ -849,6 +925,7 @@ void RAFast::AllocateBasicBlock() { } } } + LiveDbgValueMap[Reg].push_back(MI); } } // Next instruction. @@ -932,7 +1009,7 @@ void RAFast::AllocateBasicBlock() { if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (MO.isUse()) { LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst); - unsigned PhysReg = LRI->second.PhysReg; + unsigned PhysReg = LRI->PhysReg; CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0; if (setPhysReg(MI, i, PhysReg)) killVirtReg(LRI); @@ -953,13 +1030,13 @@ void RAFast::AllocateBasicBlock() { // Look for physreg defs and tied uses. if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue; UsedInInstr.set(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) UsedInInstr.set(*AS); } } unsigned DefOpEnd = MI->getNumOperands(); - if (MCID.isCall()) { + if (MI->isCall()) { // Spill all virtregs before a call. This serves two purposes: 1. If an // exception is thrown, the landing pad is going to expect to find // registers in their spill slots, and 2. we don't have to wade through @@ -988,7 +1065,7 @@ void RAFast::AllocateBasicBlock() { continue; } LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc); - unsigned PhysReg = LRI->second.PhysReg; + unsigned PhysReg = LRI->PhysReg; if (setPhysReg(MI, i, PhysReg)) { VirtDead.push_back(Reg); CopyDst = 0; // cancel coalescing; @@ -1024,6 +1101,9 @@ void RAFast::AllocateBasicBlock() { MBB->erase(Coalesced[i]); NumCopies += Coalesced.size(); + // addRetOperands must run after we've seen all defs in this block. + addRetOperands(MBB); + DEBUG(MBB->dump()); } @@ -1038,12 +1118,16 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { TM = &Fn.getTarget(); TRI = TM->getRegisterInfo(); TII = TM->getInstrInfo(); + MRI->freezeReservedRegs(Fn); RegClassInfo.runOnMachineFunction(Fn); UsedInInstr.resize(TRI->getNumRegs()); + assert(!MRI->isSSA() && "regalloc requires leaving SSA"); + // initialize the virtual->physical register map to have a 'null' // mapping for all virtual registers StackSlotForVirtReg.resize(MRI->getNumVirtRegs()); + LiveVirtRegs.setUniverse(MRI->getNumVirtRegs()); // Loop over all of the basic blocks, eliminating virtual register references for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end(); @@ -1052,16 +1136,17 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { AllocateBasicBlock(); } - // Make sure the set of used physregs is closed under subreg operations. - MRI->closePhysRegsUsed(*TRI); - // Add the clobber lists for all the instructions we skipped earlier. for (SmallPtrSet::const_iterator I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I) - if (const unsigned *Defs = (*I)->getImplicitDefs()) + if (const uint16_t *Defs = (*I)->getImplicitDefs()) while (*Defs) MRI->setPhysRegUsed(*Defs++); + // All machine operands and other references to virtual registers have been + // replaced. Remove the virtual registers. + MRI->clearVirtRegs(); + SkippedInstrs.clear(); StackSlotForVirtReg.clear(); LiveDbgValueMap.clear(); diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp index f54a2c85d100..3f2a617100c3 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -16,7 +16,6 @@ #include "AllocationOrder.h" #include "InterferenceCache.h" #include "LiveDebugVariables.h" -#include "LiveRangeEdit.h" #include "RegAllocBase.h" #include "Spiller.h" #include "SpillPlacement.h" @@ -29,6 +28,7 @@ #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -168,6 +168,19 @@ class RAGreedy : public MachineFunctionPass, } }; + // Register mask interference. The current VirtReg is checked for register + // mask interference on entry to selectOrSplit(). If there is no + // interference, UsableRegs is left empty. If there is interference, + // UsableRegs has a bit mask of registers that can be used without register + // mask interference. + BitVector UsableRegs; + + /// clobberedByRegMask - Returns true if PhysReg is not directly usable + /// because of register mask clobbers. + bool clobberedByRegMask(unsigned PhysReg) const { + return !UsableRegs.empty() && !UsableRegs.test(PhysReg); + } + // splitting state. std::auto_ptr SA; std::auto_ptr SE; @@ -248,7 +261,6 @@ class RAGreedy : public MachineFunctionPass, static char ID; private: - void LRE_WillEraseInstruction(MachineInstr*); bool LRE_CanEraseVirtReg(unsigned); void LRE_WillShrinkVirtReg(unsigned); void LRE_DidCloneVirtReg(unsigned, unsigned); @@ -308,8 +320,8 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); - initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); + initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); @@ -328,9 +340,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - if (StrongPHIElim) - AU.addRequiredID(StrongPHIEliminationID); - AU.addRequiredTransitiveID(RegisterCoalescerPassID); AU.addRequired(); AU.addRequired(); AU.addPreserved(); @@ -350,11 +359,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { // LiveRangeEdit delegate methods //===----------------------------------------------------------------------===// -void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) { - // LRE itself will remove from SlotIndexes and parent basic block. - VRM->RemoveMachineInstrFromMaps(MI); -} - bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { if (unsigned PhysReg = VRM->getPhys(VirtReg)) { unassign(LIS->getInterval(VirtReg), PhysReg); @@ -424,13 +428,13 @@ void RAGreedy::enqueue(LiveInterval *LI) { Prio |= (1u << 30); } - Queue.push(std::make_pair(Prio, Reg)); + Queue.push(std::make_pair(Prio, ~Reg)); } LiveInterval *RAGreedy::dequeue() { if (Queue.empty()) return 0; - LiveInterval *LI = &LIS->getInterval(Queue.top().second); + LiveInterval *LI = &LIS->getInterval(~Queue.top().second); Queue.pop(); return LI; } @@ -446,9 +450,12 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, SmallVectorImpl &NewVRegs) { Order.rewind(); unsigned PhysReg; - while ((PhysReg = Order.next())) + while ((PhysReg = Order.next())) { + if (clobberedByRegMask(PhysReg)) + continue; if (!checkPhysRegInterference(VirtReg, PhysReg)) break; + } if (!PhysReg || Order.isHint(PhysReg)) return PhysReg; @@ -457,7 +464,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, // If we missed a simple hint, try to cheaply evict interference from the // preferred register. if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) - if (Order.isHint(Hint)) { + if (Order.isHint(Hint) && !clobberedByRegMask(Hint)) { DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); EvictionCost MaxCost(1); if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { @@ -532,7 +539,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, Cascade = NextCascade; EvictionCost Cost; - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { + for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); // If there is 10 or more interferences, chances are one is heavier. if (Q.collectInterferingVRegs(10) >= 10) @@ -590,7 +597,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) << " interference: Cascade " << Cascade << '\n'); - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { + for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); assert(Q.seenAllInterferences() && "Didn't check all interfererences."); for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { @@ -629,6 +636,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, Order.rewind(); while (unsigned PhysReg = Order.next()) { + if (clobberedByRegMask(PhysReg)) + continue; if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; // The first use of a callee-saved register in a function has cost 1. @@ -1118,6 +1127,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, } --NumCands; GlobalCand[Worst] = GlobalCand[NumCands]; + if (BestCand == NumCands) + BestCand = Worst; } if (GlobalCand.size() <= NumCands) @@ -1172,7 +1183,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, return 0; // Prepare split editor. - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitSpillMode); // Assign all edge bundles to the preferred candidate, or NoCand. @@ -1220,7 +1231,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); unsigned Reg = VirtReg.reg; bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitSpillMode); ArrayRef UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { @@ -1268,7 +1279,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, SmallVectorImpl &GapWeight) { assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); - const SmallVectorImpl &Uses = SA->UseSlots; + ArrayRef Uses = SA->getUseSlots(); const unsigned NumGaps = Uses.size()-1; // Start and end points for the interference check. @@ -1280,7 +1291,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, GapWeight.assign(NumGaps, 0.0f); // Add interference from each overlapping register. - for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { + for (const uint16_t *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { if (!query(const_cast(SA->getParent()), *AI) .checkInterference()) continue; @@ -1292,7 +1303,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, // surrounding the instruction. The exception is interference before // StartIdx and after StopIdx. // - LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx); + LiveIntervalUnion::SegmentIter IntI = getLiveUnion(*AI).find(StartIdx); for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { // Skip the gaps before IntI. while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) @@ -1329,7 +1340,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // that the interval is continuous from FirstInstr to LastInstr. We should // make sure that we don't do anything illegal to such an interval, though. - const SmallVectorImpl &Uses = SA->UseSlots; + ArrayRef Uses = SA->getUseSlots(); if (Uses.size() <= 2) return 0; const unsigned NumGaps = Uses.size()-1; @@ -1337,10 +1348,40 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG({ dbgs() << "tryLocalSplit: "; for (unsigned i = 0, e = Uses.size(); i != e; ++i) - dbgs() << ' ' << SA->UseSlots[i]; + dbgs() << ' ' << Uses[i]; dbgs() << '\n'; }); + // If VirtReg is live across any register mask operands, compute a list of + // gaps with register masks. + SmallVector RegMaskGaps; + if (!UsableRegs.empty()) { + // Get regmask slots for the whole block. + ArrayRef RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); + DEBUG(dbgs() << RMS.size() << " regmasks in block:"); + // Constrain to VirtReg's live range. + unsigned ri = std::lower_bound(RMS.begin(), RMS.end(), + Uses.front().getRegSlot()) - RMS.begin(); + unsigned re = RMS.size(); + for (unsigned i = 0; i != NumGaps && ri != re; ++i) { + // Look for Uses[i] <= RMS <= Uses[i+1]. + assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i])); + if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri])) + continue; + // Skip a regmask on the same instruction as the last use. It doesn't + // overlap the live range. + if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps) + break; + DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]); + RegMaskGaps.push_back(i); + // Advance ri to the next gap. A regmask on one of the uses counts in + // both gaps. + while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1])) + ++ri; + } + DEBUG(dbgs() << '\n'); + } + // Since we allow local split results to be split again, there is a risk of // creating infinite loops. It is tempting to require that the new live // ranges have less instructions than the original. That would guarantee @@ -1375,6 +1416,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1]. calcGapWeights(PhysReg, GapWeight); + // Remove any gaps with regmask clobbers. + if (clobberedByRegMask(PhysReg)) + for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) + GapWeight[RegMaskGaps[i]] = HUGE_VALF; + // Try to find the best sequence of gaps to close. // The new spill weight must be larger than any gap interference. @@ -1466,7 +1512,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, << '-' << Uses[BestAfter] << ", " << BestDiff << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit); SE->openIntv(); @@ -1553,6 +1599,11 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl &NewVRegs) { + // Check if VirtReg is live across any calls. + UsableRegs.clear(); + if (LIS->checkRegMaskInterference(VirtReg, UsableRegs)) + DEBUG(dbgs() << "Live across regmasks.\n"); + // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) @@ -1593,7 +1644,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // Finally spill VirtReg itself. NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); - LiveRangeEdit LRE(VirtReg, NewVRegs, this); + LiveRangeEdit LRE(VirtReg, NewVRegs, *MF, *LIS, VRM, this); spiller().spill(LRE); setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); @@ -1628,7 +1679,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { ExtraRegInfo.clear(); ExtraRegInfo.resize(MRI->getNumVirtRegs()); NextCascade = 1; - IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI); + IntfCache.init(MF, &getLiveUnion(0), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. allocatePhysRegs(); @@ -1647,7 +1698,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { DebugVars->emitDebugValues(VRM); } - // The pass output is in VirtRegMap. Release all the transient data. + // All machine operands and other references to virtual registers have been + // replaced. Remove the virtual registers and release all the transient data. + VRM->clearAllVirt(); + MRI->clearVirtRegs(); releaseMemory(); return true; diff --git a/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp b/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp deleted file mode 100644 index ce3fb90b1126..000000000000 --- a/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp +++ /dev/null @@ -1,1543 +0,0 @@ -//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a linear scan register allocator. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "regalloc" -#include "LiveDebugVariables.h" -#include "LiveRangeEdit.h" -#include "VirtRegMap.h" -#include "VirtRegRewriter.h" -#include "RegisterClassInfo.h" -#include "Spiller.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/ADT/EquivalenceClasses.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include - -using namespace llvm; - -STATISTIC(NumIters , "Number of iterations performed"); -STATISTIC(NumBacktracks, "Number of times we had to backtrack"); -STATISTIC(NumCoalesce, "Number of copies coalesced"); -STATISTIC(NumDowngrade, "Number of registers downgraded"); - -static cl::opt -NewHeuristic("new-spilling-heuristic", - cl::desc("Use new spilling heuristic"), - cl::init(false), cl::Hidden); - -static cl::opt -TrivCoalesceEnds("trivial-coalesce-ends", - cl::desc("Attempt trivial coalescing of interval ends"), - cl::init(false), cl::Hidden); - -static cl::opt -AvoidWAWHazard("avoid-waw-hazard", - cl::desc("Avoid write-write hazards for some register classes"), - cl::init(false), cl::Hidden); - -static RegisterRegAlloc -linearscanRegAlloc("linearscan", "linear scan register allocator", - createLinearScanRegisterAllocator); - -namespace { - // When we allocate a register, add it to a fixed-size queue of - // registers to skip in subsequent allocations. This trades a small - // amount of register pressure and increased spills for flexibility in - // the post-pass scheduler. - // - // Note that in a the number of registers used for reloading spills - // will be one greater than the value of this option. - // - // One big limitation of this is that it doesn't differentiate between - // different register classes. So on x86-64, if there is xmm register - // pressure, it can caused fewer GPRs to be held in the queue. - static cl::opt - NumRecentlyUsedRegs("linearscan-skip-count", - cl::desc("Number of registers for linearscan to remember" - "to skip."), - cl::init(0), - cl::Hidden); - - struct RALinScan : public MachineFunctionPass { - static char ID; - RALinScan() : MachineFunctionPass(ID) { - initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); - initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); - initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerPass( - *PassRegistry::getPassRegistry()); - initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); - initializeLiveStacksPass(*PassRegistry::getPassRegistry()); - initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); - initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); - initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); - initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); - - // Initialize the queue to record recently-used registers. - if (NumRecentlyUsedRegs > 0) - RecentRegs.resize(NumRecentlyUsedRegs, 0); - RecentNext = RecentRegs.begin(); - avoidWAW_ = 0; - } - - typedef std::pair IntervalPtr; - typedef SmallVector IntervalPtrs; - private: - /// RelatedRegClasses - This structure is built the first time a function is - /// compiled, and keeps track of which register classes have registers that - /// belong to multiple classes or have aliases that are in other classes. - EquivalenceClasses RelatedRegClasses; - DenseMap OneClassForEachPhysReg; - - // NextReloadMap - For each register in the map, it maps to the another - // register which is defined by a reload from the same stack slot and - // both reloads are in the same basic block. - DenseMap NextReloadMap; - - // DowngradedRegs - A set of registers which are being "downgraded", i.e. - // un-favored for allocation. - SmallSet DowngradedRegs; - - // DowngradeMap - A map from virtual registers to physical registers being - // downgraded for the virtual registers. - DenseMap DowngradeMap; - - MachineFunction* mf_; - MachineRegisterInfo* mri_; - const TargetMachine* tm_; - const TargetRegisterInfo* tri_; - const TargetInstrInfo* tii_; - BitVector allocatableRegs_; - BitVector reservedRegs_; - LiveIntervals* li_; - MachineLoopInfo *loopInfo; - RegisterClassInfo RegClassInfo; - - /// handled_ - Intervals are added to the handled_ set in the order of their - /// start value. This is uses for backtracking. - std::vector handled_; - - /// fixed_ - Intervals that correspond to machine registers. - /// - IntervalPtrs fixed_; - - /// active_ - Intervals that are currently being processed, and which have a - /// live range active for the current point. - IntervalPtrs active_; - - /// inactive_ - Intervals that are currently being processed, but which have - /// a hold at the current point. - IntervalPtrs inactive_; - - typedef std::priority_queue, - greater_ptr > IntervalHeap; - IntervalHeap unhandled_; - - /// regUse_ - Tracks register usage. - SmallVector regUse_; - SmallVector regUseBackUp_; - - /// vrm_ - Tracks register assignments. - VirtRegMap* vrm_; - - std::auto_ptr rewriter_; - - std::auto_ptr spiller_; - - // The queue of recently-used registers. - SmallVector RecentRegs; - SmallVector::iterator RecentNext; - - // Last write-after-write register written. - unsigned avoidWAW_; - - // Record that we just picked this register. - void recordRecentlyUsed(unsigned reg) { - assert(reg != 0 && "Recently used register is NOREG!"); - if (!RecentRegs.empty()) { - *RecentNext++ = reg; - if (RecentNext == RecentRegs.end()) - RecentNext = RecentRegs.begin(); - } - } - - public: - virtual const char* getPassName() const { - return "Linear Scan Register Allocator"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - if (StrongPHIElim) - AU.addRequiredID(StrongPHIEliminationID); - // Make sure PassManager knows which analyses to make available - // to coalescing and which analyses coalescing invalidates. - AU.addRequiredTransitiveID(RegisterCoalescerPassID); - AU.addRequired(); - AU.addRequiredID(LiveStacksID); - AU.addPreservedID(LiveStacksID); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequiredID(MachineDominatorsID); - AU.addPreservedID(MachineDominatorsID); - MachineFunctionPass::getAnalysisUsage(AU); - } - - /// runOnMachineFunction - register allocate the whole function - bool runOnMachineFunction(MachineFunction&); - - // Determine if we skip this register due to its being recently used. - bool isRecentlyUsed(unsigned reg) const { - return reg == avoidWAW_ || - std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end(); - } - - private: - /// linearScan - the linear scan algorithm - void linearScan(); - - /// initIntervalSets - initialize the interval sets. - /// - void initIntervalSets(); - - /// processActiveIntervals - expire old intervals and move non-overlapping - /// ones to the inactive list. - void processActiveIntervals(SlotIndex CurPoint); - - /// processInactiveIntervals - expire old intervals and move overlapping - /// ones to the active list. - void processInactiveIntervals(SlotIndex CurPoint); - - /// hasNextReloadInterval - Return the next liveinterval that's being - /// defined by a reload from the same SS as the specified one. - LiveInterval *hasNextReloadInterval(LiveInterval *cur); - - /// DowngradeRegister - Downgrade a register for allocation. - void DowngradeRegister(LiveInterval *li, unsigned Reg); - - /// UpgradeRegister - Upgrade a register for allocation. - void UpgradeRegister(unsigned Reg); - - /// assignRegOrStackSlotAtInterval - assign a register if one - /// is available, or spill. - void assignRegOrStackSlotAtInterval(LiveInterval* cur); - - void updateSpillWeights(std::vector &Weights, - unsigned reg, float weight, - const TargetRegisterClass *RC); - - /// findIntervalsToSpill - Determine the intervals to spill for the - /// specified interval. It's passed the physical registers whose spill - /// weight is the lowest among all the registers whose live intervals - /// conflict with the interval. - void findIntervalsToSpill(LiveInterval *cur, - std::vector > &Candidates, - unsigned NumCands, - SmallVector &SpillIntervals); - - /// attemptTrivialCoalescing - If a simple interval is defined by a copy, - /// try to allocate the definition to the same register as the source, - /// if the register is not defined during the life time of the interval. - /// This eliminates a copy, and is used to coalesce copies which were not - /// coalesced away before allocation either due to dest and src being in - /// different register classes or because the coalescer was overly - /// conservative. - unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg); - - /// - /// Register usage / availability tracking helpers. - /// - - void initRegUses() { - regUse_.resize(tri_->getNumRegs(), 0); - regUseBackUp_.resize(tri_->getNumRegs(), 0); - } - - void finalizeRegUses() { -#ifndef NDEBUG - // Verify all the registers are "freed". - bool Error = false; - for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) { - if (regUse_[i] != 0) { - dbgs() << tri_->getName(i) << " is still in use!\n"; - Error = true; - } - } - if (Error) - llvm_unreachable(0); -#endif - regUse_.clear(); - regUseBackUp_.clear(); - } - - void addRegUse(unsigned physReg) { - assert(TargetRegisterInfo::isPhysicalRegister(physReg) && - "should be physical register!"); - ++regUse_[physReg]; - for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) - ++regUse_[*as]; - } - - void delRegUse(unsigned physReg) { - assert(TargetRegisterInfo::isPhysicalRegister(physReg) && - "should be physical register!"); - assert(regUse_[physReg] != 0); - --regUse_[physReg]; - for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) { - assert(regUse_[*as] != 0); - --regUse_[*as]; - } - } - - bool isRegAvail(unsigned physReg) const { - assert(TargetRegisterInfo::isPhysicalRegister(physReg) && - "should be physical register!"); - return regUse_[physReg] == 0; - } - - void backUpRegUses() { - regUseBackUp_ = regUse_; - } - - void restoreRegUses() { - regUse_ = regUseBackUp_; - } - - /// - /// Register handling helpers. - /// - - /// getFreePhysReg - return a free physical register for this virtual - /// register interval if we have one, otherwise return 0. - unsigned getFreePhysReg(LiveInterval* cur); - unsigned getFreePhysReg(LiveInterval* cur, - const TargetRegisterClass *RC, - unsigned MaxInactiveCount, - SmallVector &inactiveCounts, - bool SkipDGRegs); - - /// getFirstNonReservedPhysReg - return the first non-reserved physical - /// register in the register class. - unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) { - ArrayRef O = RegClassInfo.getOrder(RC); - assert(!O.empty() && "All registers reserved?!"); - return O.front(); - } - - void ComputeRelatedRegClasses(); - - template - void printIntervals(const char* const str, ItTy i, ItTy e) const { - DEBUG({ - if (str) - dbgs() << str << " intervals:\n"; - - for (; i != e; ++i) { - dbgs() << '\t' << *i->first << " -> "; - - unsigned reg = i->first->reg; - if (TargetRegisterInfo::isVirtualRegister(reg)) - reg = vrm_->getPhys(reg); - - dbgs() << tri_->getName(reg) << '\n'; - } - }); - } - }; - char RALinScan::ID = 0; -} - -INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc", - "Linear Scan Register Allocator", false, false) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) -INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights) -INITIALIZE_PASS_DEPENDENCY(LiveStacks) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(VirtRegMap) -INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc", - "Linear Scan Register Allocator", false, false) - -void RALinScan::ComputeRelatedRegClasses() { - // First pass, add all reg classes to the union, and determine at least one - // reg class that each register is in. - bool HasAliases = false; - for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(), - E = tri_->regclass_end(); RCI != E; ++RCI) { - RelatedRegClasses.insert(*RCI); - for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end(); - I != E; ++I) { - HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0; - - const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I]; - if (PRC) { - // Already processed this register. Just make sure we know that - // multiple register classes share a register. - RelatedRegClasses.unionSets(PRC, *RCI); - } else { - PRC = *RCI; - } - } - } - - // Second pass, now that we know conservatively what register classes each reg - // belongs to, add info about aliases. We don't need to do this for targets - // without register aliases. - if (HasAliases) - for (DenseMap::iterator - I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end(); - I != E; ++I) - for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) { - const TargetRegisterClass *AliasClass = - OneClassForEachPhysReg.lookup(*AS); - if (AliasClass) - RelatedRegClasses.unionSets(I->second, AliasClass); - } -} - -/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try -/// allocate the definition the same register as the source register if the -/// register is not defined during live time of the interval. If the interval is -/// killed by a copy, try to use the destination register. This eliminates a -/// copy. This is used to coalesce copies which were not coalesced away before -/// allocation either due to dest and src being in different register classes or -/// because the coalescer was overly conservative. -unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { - unsigned Preference = vrm_->getRegAllocPref(cur.reg); - if ((Preference && Preference == Reg) || !cur.containsOneValue()) - return Reg; - - // We cannot handle complicated live ranges. Simple linear stuff only. - if (cur.ranges.size() != 1) - return Reg; - - const LiveRange &range = cur.ranges.front(); - - VNInfo *vni = range.valno; - if (vni->isUnused() || !vni->def.isValid()) - return Reg; - - unsigned CandReg; - { - MachineInstr *CopyMI; - if ((CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy()) - // Defined by a copy, try to extend SrcReg forward - CandReg = CopyMI->getOperand(1).getReg(); - else if (TrivCoalesceEnds && - (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) && - CopyMI->isCopy() && cur.reg == CopyMI->getOperand(1).getReg()) - // Only used by a copy, try to extend DstReg backwards - CandReg = CopyMI->getOperand(0).getReg(); - else - return Reg; - - // If the target of the copy is a sub-register then don't coalesce. - if(CopyMI->getOperand(0).getSubReg()) - return Reg; - } - - if (TargetRegisterInfo::isVirtualRegister(CandReg)) { - if (!vrm_->isAssignedReg(CandReg)) - return Reg; - CandReg = vrm_->getPhys(CandReg); - } - if (Reg == CandReg) - return Reg; - - const TargetRegisterClass *RC = mri_->getRegClass(cur.reg); - if (!RC->contains(CandReg)) - return Reg; - - if (li_->conflictsWithPhysReg(cur, *vrm_, CandReg)) - return Reg; - - // Try to coalesce. - DEBUG(dbgs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg) - << '\n'); - vrm_->clearVirt(cur.reg); - vrm_->assignVirt2Phys(cur.reg, CandReg); - - ++NumCoalesce; - return CandReg; -} - -bool RALinScan::runOnMachineFunction(MachineFunction &fn) { - mf_ = &fn; - mri_ = &fn.getRegInfo(); - tm_ = &fn.getTarget(); - tri_ = tm_->getRegisterInfo(); - tii_ = tm_->getInstrInfo(); - allocatableRegs_ = tri_->getAllocatableSet(fn); - reservedRegs_ = tri_->getReservedRegs(fn); - li_ = &getAnalysis(); - loopInfo = &getAnalysis(); - RegClassInfo.runOnMachineFunction(fn); - - // We don't run the coalescer here because we have no reason to - // interact with it. If the coalescer requires interaction, it - // won't do anything. If it doesn't require interaction, we assume - // it was run as a separate pass. - - // If this is the first function compiled, compute the related reg classes. - if (RelatedRegClasses.empty()) - ComputeRelatedRegClasses(); - - // Also resize register usage trackers. - initRegUses(); - - vrm_ = &getAnalysis(); - if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter()); - - spiller_.reset(createSpiller(*this, *mf_, *vrm_)); - - initIntervalSets(); - - linearScan(); - - // Rewrite spill code and update the PhysRegsUsed set. - rewriter_->runOnMachineFunction(*mf_, *vrm_, li_); - - // Write out new DBG_VALUE instructions. - getAnalysis().emitDebugValues(vrm_); - - assert(unhandled_.empty() && "Unhandled live intervals remain!"); - - finalizeRegUses(); - - fixed_.clear(); - active_.clear(); - inactive_.clear(); - handled_.clear(); - NextReloadMap.clear(); - DowngradedRegs.clear(); - DowngradeMap.clear(); - spiller_.reset(0); - - return true; -} - -/// initIntervalSets - initialize the interval sets. -/// -void RALinScan::initIntervalSets() -{ - assert(unhandled_.empty() && fixed_.empty() && - active_.empty() && inactive_.empty() && - "interval sets should be empty on initialization"); - - handled_.reserve(li_->getNumIntervals()); - - for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { - if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) { - if (!i->second->empty() && allocatableRegs_.test(i->second->reg)) { - mri_->setPhysRegUsed(i->second->reg); - fixed_.push_back(std::make_pair(i->second, i->second->begin())); - } - } else { - if (i->second->empty()) { - assignRegOrStackSlotAtInterval(i->second); - } - else - unhandled_.push(i->second); - } - } -} - -void RALinScan::linearScan() { - // linear scan algorithm - DEBUG({ - dbgs() << "********** LINEAR SCAN **********\n" - << "********** Function: " - << mf_->getFunction()->getName() << '\n'; - printIntervals("fixed", fixed_.begin(), fixed_.end()); - }); - - while (!unhandled_.empty()) { - // pick the interval with the earliest start point - LiveInterval* cur = unhandled_.top(); - unhandled_.pop(); - ++NumIters; - DEBUG(dbgs() << "\n*** CURRENT ***: " << *cur << '\n'); - - assert(!cur->empty() && "Empty interval in unhandled set."); - - processActiveIntervals(cur->beginIndex()); - processInactiveIntervals(cur->beginIndex()); - - assert(TargetRegisterInfo::isVirtualRegister(cur->reg) && - "Can only allocate virtual registers!"); - - // Allocating a virtual register. try to find a free - // physical register or spill an interval (possibly this one) in order to - // assign it one. - assignRegOrStackSlotAtInterval(cur); - - DEBUG({ - printIntervals("active", active_.begin(), active_.end()); - printIntervals("inactive", inactive_.begin(), inactive_.end()); - }); - } - - // Expire any remaining active intervals - while (!active_.empty()) { - IntervalPtr &IP = active_.back(); - unsigned reg = IP.first->reg; - DEBUG(dbgs() << "\tinterval " << *IP.first << " expired\n"); - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - delRegUse(reg); - active_.pop_back(); - } - - // Expire any remaining inactive intervals - DEBUG({ - for (IntervalPtrs::reverse_iterator - i = inactive_.rbegin(); i != inactive_.rend(); ++i) - dbgs() << "\tinterval " << *i->first << " expired\n"; - }); - inactive_.clear(); - - // Add live-ins to every BB except for entry. Also perform trivial coalescing. - MachineFunction::iterator EntryMBB = mf_->begin(); - SmallVector LiveInMBBs; - for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { - LiveInterval &cur = *i->second; - unsigned Reg = 0; - bool isPhys = TargetRegisterInfo::isPhysicalRegister(cur.reg); - if (isPhys) - Reg = cur.reg; - else if (vrm_->isAssignedReg(cur.reg)) - Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg)); - if (!Reg) - continue; - // Ignore splited live intervals. - if (!isPhys && vrm_->getPreSplitReg(cur.reg)) - continue; - - for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end(); - I != E; ++I) { - const LiveRange &LR = *I; - if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) { - for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i) - if (LiveInMBBs[i] != EntryMBB) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && - "Adding a virtual register to livein set?"); - LiveInMBBs[i]->addLiveIn(Reg); - } - LiveInMBBs.clear(); - } - } - } - - DEBUG(dbgs() << *vrm_); - - // Look for physical registers that end up not being allocated even though - // register allocator had to spill other registers in its register class. - if (!vrm_->FindUnusedRegisters(li_)) - return; -} - -/// processActiveIntervals - expire old intervals and move non-overlapping ones -/// to the inactive list. -void RALinScan::processActiveIntervals(SlotIndex CurPoint) -{ - DEBUG(dbgs() << "\tprocessing active intervals:\n"); - - for (unsigned i = 0, e = active_.size(); i != e; ++i) { - LiveInterval *Interval = active_[i].first; - LiveInterval::iterator IntervalPos = active_[i].second; - unsigned reg = Interval->reg; - - IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); - - if (IntervalPos == Interval->end()) { // Remove expired intervals. - DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n"); - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - delRegUse(reg); - - // Pop off the end of the list. - active_[i] = active_.back(); - active_.pop_back(); - --i; --e; - - } else if (IntervalPos->start > CurPoint) { - // Move inactive intervals to inactive list. - DEBUG(dbgs() << "\t\tinterval " << *Interval << " inactive\n"); - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - delRegUse(reg); - // add to inactive. - inactive_.push_back(std::make_pair(Interval, IntervalPos)); - - // Pop off the end of the list. - active_[i] = active_.back(); - active_.pop_back(); - --i; --e; - } else { - // Otherwise, just update the iterator position. - active_[i].second = IntervalPos; - } - } -} - -/// processInactiveIntervals - expire old intervals and move overlapping -/// ones to the active list. -void RALinScan::processInactiveIntervals(SlotIndex CurPoint) -{ - DEBUG(dbgs() << "\tprocessing inactive intervals:\n"); - - for (unsigned i = 0, e = inactive_.size(); i != e; ++i) { - LiveInterval *Interval = inactive_[i].first; - LiveInterval::iterator IntervalPos = inactive_[i].second; - unsigned reg = Interval->reg; - - IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); - - if (IntervalPos == Interval->end()) { // remove expired intervals. - DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n"); - - // Pop off the end of the list. - inactive_[i] = inactive_.back(); - inactive_.pop_back(); - --i; --e; - } else if (IntervalPos->start <= CurPoint) { - // move re-activated intervals in active list - DEBUG(dbgs() << "\t\tinterval " << *Interval << " active\n"); - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - addRegUse(reg); - // add to active - active_.push_back(std::make_pair(Interval, IntervalPos)); - - // Pop off the end of the list. - inactive_[i] = inactive_.back(); - inactive_.pop_back(); - --i; --e; - } else { - // Otherwise, just update the iterator position. - inactive_[i].second = IntervalPos; - } - } -} - -/// updateSpillWeights - updates the spill weights of the specifed physical -/// register and its weight. -void RALinScan::updateSpillWeights(std::vector &Weights, - unsigned reg, float weight, - const TargetRegisterClass *RC) { - SmallSet Processed; - SmallSet SuperAdded; - SmallVector Supers; - Weights[reg] += weight; - Processed.insert(reg); - for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) { - Weights[*as] += weight; - Processed.insert(*as); - if (tri_->isSubRegister(*as, reg) && - SuperAdded.insert(*as) && - RC->contains(*as)) { - Supers.push_back(*as); - } - } - - // If the alias is a super-register, and the super-register is in the - // register class we are trying to allocate. Then add the weight to all - // sub-registers of the super-register even if they are not aliases. - // e.g. allocating for GR32, bh is not used, updating bl spill weight. - // bl should get the same spill weight otherwise it will be chosen - // as a spill candidate since spilling bh doesn't make ebx available. - for (unsigned i = 0, e = Supers.size(); i != e; ++i) { - for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr) - if (!Processed.count(*sr)) - Weights[*sr] += weight; - } -} - -static -RALinScan::IntervalPtrs::iterator -FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) { - for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end(); - I != E; ++I) - if (I->first == LI) return I; - return IP.end(); -} - -static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, - SlotIndex Point){ - for (unsigned i = 0, e = V.size(); i != e; ++i) { - RALinScan::IntervalPtr &IP = V[i]; - LiveInterval::iterator I = std::upper_bound(IP.first->begin(), - IP.second, Point); - if (I != IP.first->begin()) --I; - IP.second = I; - } -} - -/// getConflictWeight - Return the number of conflicts between cur -/// live interval and defs and uses of Reg weighted by loop depthes. -static -float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_, - MachineRegisterInfo *mri_, - MachineLoopInfo *loopInfo) { - float Conflicts = 0; - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg), - E = mri_->reg_end(); I != E; ++I) { - MachineInstr *MI = &*I; - if (cur->liveAt(li_->getInstructionIndex(MI))) { - unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent()); - Conflicts += std::pow(10.0f, (float)loopDepth); - } - } - return Conflicts; -} - -/// findIntervalsToSpill - Determine the intervals to spill for the -/// specified interval. It's passed the physical registers whose spill -/// weight is the lowest among all the registers whose live intervals -/// conflict with the interval. -void RALinScan::findIntervalsToSpill(LiveInterval *cur, - std::vector > &Candidates, - unsigned NumCands, - SmallVector &SpillIntervals) { - // We have figured out the *best* register to spill. But there are other - // registers that are pretty good as well (spill weight within 3%). Spill - // the one that has fewest defs and uses that conflict with cur. - float Conflicts[3] = { 0.0f, 0.0f, 0.0f }; - SmallVector SLIs[3]; - - DEBUG({ - dbgs() << "\tConsidering " << NumCands << " candidates: "; - for (unsigned i = 0; i != NumCands; ++i) - dbgs() << tri_->getName(Candidates[i].first) << " "; - dbgs() << "\n"; - }); - - // Calculate the number of conflicts of each candidate. - for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) { - unsigned Reg = i->first->reg; - unsigned PhysReg = vrm_->getPhys(Reg); - if (!cur->overlapsFrom(*i->first, i->second)) - continue; - for (unsigned j = 0; j < NumCands; ++j) { - unsigned Candidate = Candidates[j].first; - if (tri_->regsOverlap(PhysReg, Candidate)) { - if (NumCands > 1) - Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo); - SLIs[j].push_back(i->first); - } - } - } - - for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){ - unsigned Reg = i->first->reg; - unsigned PhysReg = vrm_->getPhys(Reg); - if (!cur->overlapsFrom(*i->first, i->second-1)) - continue; - for (unsigned j = 0; j < NumCands; ++j) { - unsigned Candidate = Candidates[j].first; - if (tri_->regsOverlap(PhysReg, Candidate)) { - if (NumCands > 1) - Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo); - SLIs[j].push_back(i->first); - } - } - } - - // Which is the best candidate? - unsigned BestCandidate = 0; - float MinConflicts = Conflicts[0]; - for (unsigned i = 1; i != NumCands; ++i) { - if (Conflicts[i] < MinConflicts) { - BestCandidate = i; - MinConflicts = Conflicts[i]; - } - } - - std::copy(SLIs[BestCandidate].begin(), SLIs[BestCandidate].end(), - std::back_inserter(SpillIntervals)); -} - -namespace { - struct WeightCompare { - private: - const RALinScan &Allocator; - - public: - WeightCompare(const RALinScan &Alloc) : Allocator(Alloc) {} - - typedef std::pair RegWeightPair; - bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const { - return LHS.second < RHS.second && !Allocator.isRecentlyUsed(LHS.first); - } - }; -} - -static bool weightsAreClose(float w1, float w2) { - if (!NewHeuristic) - return false; - - float diff = w1 - w2; - if (diff <= 0.02f) // Within 0.02f - return true; - return (diff / w2) <= 0.05f; // Within 5%. -} - -LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) { - DenseMap::iterator I = NextReloadMap.find(cur->reg); - if (I == NextReloadMap.end()) - return 0; - return &li_->getInterval(I->second); -} - -void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) { - for (const unsigned *AS = tri_->getOverlaps(Reg); *AS; ++AS) { - bool isNew = DowngradedRegs.insert(*AS); - (void)isNew; // Silence compiler warning. - assert(isNew && "Multiple reloads holding the same register?"); - DowngradeMap.insert(std::make_pair(li->reg, *AS)); - } - ++NumDowngrade; -} - -void RALinScan::UpgradeRegister(unsigned Reg) { - if (Reg) { - DowngradedRegs.erase(Reg); - for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) - DowngradedRegs.erase(*AS); - } -} - -namespace { - struct LISorter { - bool operator()(LiveInterval* A, LiveInterval* B) { - return A->beginIndex() < B->beginIndex(); - } - }; -} - -/// assignRegOrStackSlotAtInterval - assign a register if one is available, or -/// spill. -void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { - const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); - DEBUG(dbgs() << "\tallocating current interval from " - << RC->getName() << ": "); - - // This is an implicitly defined live interval, just assign any register. - if (cur->empty()) { - unsigned physReg = vrm_->getRegAllocPref(cur->reg); - if (!physReg) - physReg = getFirstNonReservedPhysReg(RC); - DEBUG(dbgs() << tri_->getName(physReg) << '\n'); - // Note the register is not really in use. - vrm_->assignVirt2Phys(cur->reg, physReg); - return; - } - - backUpRegUses(); - - std::vector > SpillWeightsToAdd; - SlotIndex StartPosition = cur->beginIndex(); - const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); - - // If start of this live interval is defined by a move instruction and its - // source is assigned a physical register that is compatible with the target - // register class, then we should try to assign it the same register. - // This can happen when the move is from a larger register class to a smaller - // one, e.g. X86::mov32to32_. These move instructions are not coalescable. - if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) { - VNInfo *vni = cur->begin()->valno; - if (!vni->isUnused() && vni->def.isValid()) { - MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); - if (CopyMI && CopyMI->isCopy()) { - unsigned DstSubReg = CopyMI->getOperand(0).getSubReg(); - unsigned SrcReg = CopyMI->getOperand(1).getReg(); - unsigned SrcSubReg = CopyMI->getOperand(1).getSubReg(); - unsigned Reg = 0; - if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) - Reg = SrcReg; - else if (vrm_->isAssignedReg(SrcReg)) - Reg = vrm_->getPhys(SrcReg); - if (Reg) { - if (SrcSubReg) - Reg = tri_->getSubReg(Reg, SrcSubReg); - if (DstSubReg) - Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC); - if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) - mri_->setRegAllocationHint(cur->reg, 0, Reg); - } - } - } - } - - // For every interval in inactive we overlap with, mark the - // register as not free and update spill weights. - for (IntervalPtrs::const_iterator i = inactive_.begin(), - e = inactive_.end(); i != e; ++i) { - unsigned Reg = i->first->reg; - assert(TargetRegisterInfo::isVirtualRegister(Reg) && - "Can only allocate virtual registers!"); - const TargetRegisterClass *RegRC = mri_->getRegClass(Reg); - // If this is not in a related reg class to the register we're allocating, - // don't check it. - if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && - cur->overlapsFrom(*i->first, i->second-1)) { - Reg = vrm_->getPhys(Reg); - addRegUse(Reg); - SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight)); - } - } - - // Speculatively check to see if we can get a register right now. If not, - // we know we won't be able to by adding more constraints. If so, we can - // check to see if it is valid. Doing an exhaustive search of the fixed_ list - // is very bad (it contains all callee clobbered registers for any functions - // with a call), so we want to avoid doing that if possible. - unsigned physReg = getFreePhysReg(cur); - unsigned BestPhysReg = physReg; - if (physReg) { - // We got a register. However, if it's in the fixed_ list, we might - // conflict with it. Check to see if we conflict with it or any of its - // aliases. - SmallSet RegAliases; - for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS) - RegAliases.insert(*AS); - - bool ConflictsWithFixed = false; - for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { - IntervalPtr &IP = fixed_[i]; - if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) { - // Okay, this reg is on the fixed list. Check to see if we actually - // conflict. - LiveInterval *I = IP.first; - if (I->endIndex() > StartPosition) { - LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); - IP.second = II; - if (II != I->begin() && II->start > StartPosition) - --II; - if (cur->overlapsFrom(*I, II)) { - ConflictsWithFixed = true; - break; - } - } - } - } - - // Okay, the register picked by our speculative getFreePhysReg call turned - // out to be in use. Actually add all of the conflicting fixed registers to - // regUse_ so we can do an accurate query. - if (ConflictsWithFixed) { - // For every interval in fixed we overlap with, mark the register as not - // free and update spill weights. - for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { - IntervalPtr &IP = fixed_[i]; - LiveInterval *I = IP.first; - - const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg]; - if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && - I->endIndex() > StartPosition) { - LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); - IP.second = II; - if (II != I->begin() && II->start > StartPosition) - --II; - if (cur->overlapsFrom(*I, II)) { - unsigned reg = I->reg; - addRegUse(reg); - SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight)); - } - } - } - - // Using the newly updated regUse_ object, which includes conflicts in the - // future, see if there are any registers available. - physReg = getFreePhysReg(cur); - } - } - - // Restore the physical register tracker, removing information about the - // future. - restoreRegUses(); - - // If we find a free register, we are done: assign this virtual to - // the free physical register and add this interval to the active - // list. - if (physReg) { - DEBUG(dbgs() << tri_->getName(physReg) << '\n'); - assert(RC->contains(physReg) && "Invalid candidate"); - vrm_->assignVirt2Phys(cur->reg, physReg); - addRegUse(physReg); - active_.push_back(std::make_pair(cur, cur->begin())); - handled_.push_back(cur); - - // Remember physReg for avoiding a write-after-write hazard in the next - // instruction. - if (AvoidWAWHazard && - tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg))) - avoidWAW_ = physReg; - - // "Upgrade" the physical register since it has been allocated. - UpgradeRegister(physReg); - if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) { - // "Downgrade" physReg to try to keep physReg from being allocated until - // the next reload from the same SS is allocated. - mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg); - DowngradeRegister(cur, physReg); - } - return; - } - DEBUG(dbgs() << "no free registers\n"); - - // Compile the spill weights into an array that is better for scanning. - std::vector SpillWeights(tri_->getNumRegs(), 0.0f); - for (std::vector >::iterator - I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I) - updateSpillWeights(SpillWeights, I->first, I->second, RC); - - // for each interval in active, update spill weights. - for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end(); - i != e; ++i) { - unsigned reg = i->first->reg; - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - updateSpillWeights(SpillWeights, reg, i->first->weight, RC); - } - - DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n"); - - // Find a register to spill. - float minWeight = HUGE_VALF; - unsigned minReg = 0; - - bool Found = false; - std::vector > RegsWeights; - ArrayRef Order = RegClassInfo.getOrder(RC); - if (!minReg || SpillWeights[minReg] == HUGE_VALF) - for (unsigned i = 0; i != Order.size(); ++i) { - unsigned reg = Order[i]; - float regWeight = SpillWeights[reg]; - // Skip recently allocated registers and reserved registers. - if (minWeight > regWeight && !isRecentlyUsed(reg)) - Found = true; - RegsWeights.push_back(std::make_pair(reg, regWeight)); - } - - // If we didn't find a register that is spillable, try aliases? - if (!Found) { - for (unsigned i = 0; i != Order.size(); ++i) { - unsigned reg = Order[i]; - // No need to worry about if the alias register size < regsize of RC. - // We are going to spill all registers that alias it anyway. - for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) - RegsWeights.push_back(std::make_pair(*as, SpillWeights[*as])); - } - } - - // Sort all potential spill candidates by weight. - std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare(*this)); - minReg = RegsWeights[0].first; - minWeight = RegsWeights[0].second; - if (minWeight == HUGE_VALF) { - // All registers must have inf weight. Just grab one! - minReg = BestPhysReg ? BestPhysReg : getFirstNonReservedPhysReg(RC); - if (cur->weight == HUGE_VALF || - li_->getApproximateInstructionCount(*cur) == 0) { - // Spill a physical register around defs and uses. - if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) { - // spillPhysRegAroundRegDefsUses may have invalidated iterator stored - // in fixed_. Reset them. - for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { - IntervalPtr &IP = fixed_[i]; - LiveInterval *I = IP.first; - if (I->reg == minReg || tri_->isSubRegister(minReg, I->reg)) - IP.second = I->advanceTo(I->begin(), StartPosition); - } - - DowngradedRegs.clear(); - assignRegOrStackSlotAtInterval(cur); - } else { - assert(false && "Ran out of registers during register allocation!"); - report_fatal_error("Ran out of registers during register allocation!"); - } - return; - } - } - - // Find up to 3 registers to consider as spill candidates. - unsigned LastCandidate = RegsWeights.size() >= 3 ? 3 : 1; - while (LastCandidate > 1) { - if (weightsAreClose(RegsWeights[LastCandidate-1].second, minWeight)) - break; - --LastCandidate; - } - - DEBUG({ - dbgs() << "\t\tregister(s) with min weight(s): "; - - for (unsigned i = 0; i != LastCandidate; ++i) - dbgs() << tri_->getName(RegsWeights[i].first) - << " (" << RegsWeights[i].second << ")\n"; - }); - - // If the current has the minimum weight, we need to spill it and - // add any added intervals back to unhandled, and restart - // linearscan. - if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { - DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n'); - SmallVector added; - LiveRangeEdit LRE(*cur, added); - spiller_->spill(LRE); - - std::sort(added.begin(), added.end(), LISorter()); - if (added.empty()) - return; // Early exit if all spills were folded. - - // Merge added with unhandled. Note that we have already sorted - // intervals returned by addIntervalsForSpills by their starting - // point. - // This also update the NextReloadMap. That is, it adds mapping from a - // register defined by a reload from SS to the next reload from SS in the - // same basic block. - MachineBasicBlock *LastReloadMBB = 0; - LiveInterval *LastReload = 0; - int LastReloadSS = VirtRegMap::NO_STACK_SLOT; - for (unsigned i = 0, e = added.size(); i != e; ++i) { - LiveInterval *ReloadLi = added[i]; - if (ReloadLi->weight == HUGE_VALF && - li_->getApproximateInstructionCount(*ReloadLi) == 0) { - SlotIndex ReloadIdx = ReloadLi->beginIndex(); - MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); - int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); - if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { - // Last reload of same SS is in the same MBB. We want to try to - // allocate both reloads the same register and make sure the reg - // isn't clobbered in between if at all possible. - assert(LastReload->beginIndex() < ReloadIdx); - NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); - } - LastReloadMBB = ReloadMBB; - LastReload = ReloadLi; - LastReloadSS = ReloadSS; - } - unhandled_.push(ReloadLi); - } - return; - } - - ++NumBacktracks; - - // Push the current interval back to unhandled since we are going - // to re-run at least this iteration. Since we didn't modify it it - // should go back right in the front of the list - unhandled_.push(cur); - - assert(TargetRegisterInfo::isPhysicalRegister(minReg) && - "did not choose a register to spill?"); - - // We spill all intervals aliasing the register with - // minimum weight, rollback to the interval with the earliest - // start point and let the linear scan algorithm run again - SmallVector spillIs; - - // Determine which intervals have to be spilled. - findIntervalsToSpill(cur, RegsWeights, LastCandidate, spillIs); - - // Set of spilled vregs (used later to rollback properly) - SmallSet spilled; - - // The earliest start of a Spilled interval indicates up to where - // in handled we need to roll back - assert(!spillIs.empty() && "No spill intervals?"); - SlotIndex earliestStart = spillIs[0]->beginIndex(); - - // Spill live intervals of virtual regs mapped to the physical register we - // want to clear (and its aliases). We only spill those that overlap with the - // current interval as the rest do not affect its allocation. we also keep - // track of the earliest start of all spilled live intervals since this will - // mark our rollback point. - SmallVector added; - while (!spillIs.empty()) { - LiveInterval *sli = spillIs.back(); - spillIs.pop_back(); - DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n'); - if (sli->beginIndex() < earliestStart) - earliestStart = sli->beginIndex(); - LiveRangeEdit LRE(*sli, added, 0, &spillIs); - spiller_->spill(LRE); - spilled.insert(sli->reg); - } - - // Include any added intervals in earliestStart. - for (unsigned i = 0, e = added.size(); i != e; ++i) { - SlotIndex SI = added[i]->beginIndex(); - if (SI < earliestStart) - earliestStart = SI; - } - - DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n'); - - // Scan handled in reverse order up to the earliest start of a - // spilled live interval and undo each one, restoring the state of - // unhandled. - while (!handled_.empty()) { - LiveInterval* i = handled_.back(); - // If this interval starts before t we are done. - if (!i->empty() && i->beginIndex() < earliestStart) - break; - DEBUG(dbgs() << "\t\t\tundo changes for: " << *i << '\n'); - handled_.pop_back(); - - // When undoing a live interval allocation we must know if it is active or - // inactive to properly update regUse_ and the VirtRegMap. - IntervalPtrs::iterator it; - if ((it = FindIntervalInVector(active_, i)) != active_.end()) { - active_.erase(it); - assert(!TargetRegisterInfo::isPhysicalRegister(i->reg)); - if (!spilled.count(i->reg)) - unhandled_.push(i); - delRegUse(vrm_->getPhys(i->reg)); - vrm_->clearVirt(i->reg); - } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) { - inactive_.erase(it); - assert(!TargetRegisterInfo::isPhysicalRegister(i->reg)); - if (!spilled.count(i->reg)) - unhandled_.push(i); - vrm_->clearVirt(i->reg); - } else { - assert(TargetRegisterInfo::isVirtualRegister(i->reg) && - "Can only allocate virtual registers!"); - vrm_->clearVirt(i->reg); - unhandled_.push(i); - } - - DenseMap::iterator ii = DowngradeMap.find(i->reg); - if (ii == DowngradeMap.end()) - // It interval has a preference, it must be defined by a copy. Clear the - // preference now since the source interval allocation may have been - // undone as well. - mri_->setRegAllocationHint(i->reg, 0, 0); - else { - UpgradeRegister(ii->second); - } - } - - // Rewind the iterators in the active, inactive, and fixed lists back to the - // point we reverted to. - RevertVectorIteratorsTo(active_, earliestStart); - RevertVectorIteratorsTo(inactive_, earliestStart); - RevertVectorIteratorsTo(fixed_, earliestStart); - - // Scan the rest and undo each interval that expired after t and - // insert it in active (the next iteration of the algorithm will - // put it in inactive if required) - for (unsigned i = 0, e = handled_.size(); i != e; ++i) { - LiveInterval *HI = handled_[i]; - if (!HI->expiredAt(earliestStart) && - HI->expiredAt(cur->beginIndex())) { - DEBUG(dbgs() << "\t\t\tundo changes for: " << *HI << '\n'); - active_.push_back(std::make_pair(HI, HI->begin())); - assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg)); - addRegUse(vrm_->getPhys(HI->reg)); - } - } - - // Merge added with unhandled. - // This also update the NextReloadMap. That is, it adds mapping from a - // register defined by a reload from SS to the next reload from SS in the - // same basic block. - MachineBasicBlock *LastReloadMBB = 0; - LiveInterval *LastReload = 0; - int LastReloadSS = VirtRegMap::NO_STACK_SLOT; - std::sort(added.begin(), added.end(), LISorter()); - for (unsigned i = 0, e = added.size(); i != e; ++i) { - LiveInterval *ReloadLi = added[i]; - if (ReloadLi->weight == HUGE_VALF && - li_->getApproximateInstructionCount(*ReloadLi) == 0) { - SlotIndex ReloadIdx = ReloadLi->beginIndex(); - MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); - int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); - if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { - // Last reload of same SS is in the same MBB. We want to try to - // allocate both reloads the same register and make sure the reg - // isn't clobbered in between if at all possible. - assert(LastReload->beginIndex() < ReloadIdx); - NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); - } - LastReloadMBB = ReloadMBB; - LastReload = ReloadLi; - LastReloadSS = ReloadSS; - } - unhandled_.push(ReloadLi); - } -} - -unsigned RALinScan::getFreePhysReg(LiveInterval* cur, - const TargetRegisterClass *RC, - unsigned MaxInactiveCount, - SmallVector &inactiveCounts, - bool SkipDGRegs) { - unsigned FreeReg = 0; - unsigned FreeRegInactiveCount = 0; - - std::pair Hint = mri_->getRegAllocationHint(cur->reg); - // Resolve second part of the hint (if possible) given the current allocation. - unsigned physReg = Hint.second; - if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg)) - physReg = vrm_->getPhys(physReg); - - ArrayRef Order; - if (Hint.first) - Order = tri_->getRawAllocationOrder(RC, Hint.first, physReg, *mf_); - else - Order = RegClassInfo.getOrder(RC); - - assert(!Order.empty() && "No allocatable register in this register class!"); - - // Scan for the first available register. - for (unsigned i = 0; i != Order.size(); ++i) { - unsigned Reg = Order[i]; - // Ignore "downgraded" registers. - if (SkipDGRegs && DowngradedRegs.count(Reg)) - continue; - // Skip reserved registers. - if (reservedRegs_.test(Reg)) - continue; - // Skip recently allocated registers. - if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) { - FreeReg = Reg; - if (FreeReg < inactiveCounts.size()) - FreeRegInactiveCount = inactiveCounts[FreeReg]; - else - FreeRegInactiveCount = 0; - break; - } - } - - // If there are no free regs, or if this reg has the max inactive count, - // return this register. - if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) { - // Remember what register we picked so we can skip it next time. - if (FreeReg != 0) recordRecentlyUsed(FreeReg); - return FreeReg; - } - - // Continue scanning the registers, looking for the one with the highest - // inactive count. Alkis found that this reduced register pressure very - // slightly on X86 (in rev 1.94 of this file), though this should probably be - // reevaluated now. - for (unsigned i = 0; i != Order.size(); ++i) { - unsigned Reg = Order[i]; - // Ignore "downgraded" registers. - if (SkipDGRegs && DowngradedRegs.count(Reg)) - continue; - // Skip reserved registers. - if (reservedRegs_.test(Reg)) - continue; - if (isRegAvail(Reg) && Reg < inactiveCounts.size() && - FreeRegInactiveCount < inactiveCounts[Reg] && - (!SkipDGRegs || !isRecentlyUsed(Reg))) { - FreeReg = Reg; - FreeRegInactiveCount = inactiveCounts[Reg]; - if (FreeRegInactiveCount == MaxInactiveCount) - break; // We found the one with the max inactive count. - } - } - - // Remember what register we picked so we can skip it next time. - recordRecentlyUsed(FreeReg); - - return FreeReg; -} - -/// getFreePhysReg - return a free physical register for this virtual register -/// interval if we have one, otherwise return 0. -unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { - SmallVector inactiveCounts; - unsigned MaxInactiveCount = 0; - - const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); - const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); - - for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end(); - i != e; ++i) { - unsigned reg = i->first->reg; - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - - // If this is not in a related reg class to the register we're allocating, - // don't check it. - const TargetRegisterClass *RegRC = mri_->getRegClass(reg); - if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) { - reg = vrm_->getPhys(reg); - if (inactiveCounts.size() <= reg) - inactiveCounts.resize(reg+1); - ++inactiveCounts[reg]; - MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]); - } - } - - // If copy coalescer has assigned a "preferred" register, check if it's - // available first. - unsigned Preference = vrm_->getRegAllocPref(cur->reg); - if (Preference) { - DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") "); - if (isRegAvail(Preference) && - RC->contains(Preference)) - return Preference; - } - - unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, - true); - if (FreeReg) - return FreeReg; - return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false); -} - -FunctionPass* llvm::createLinearScanRegisterAllocator() { - return new RALinScan(); -} diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp index 0d2cf2d6184c..a2846145bc7e 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -32,14 +32,17 @@ #define DEBUG_TYPE "regalloc" #include "RenderMachineFunction.h" -#include "Splitter.h" +#include "Spiller.h" #include "VirtRegMap.h" -#include "VirtRegRewriter.h" #include "RegisterCoalescer.h" +#include "llvm/Module.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/RegAllocPBQP.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -54,6 +57,7 @@ #include #include #include +#include #include using namespace llvm; @@ -67,10 +71,12 @@ pbqpCoalescing("pbqp-coalescing", cl::desc("Attempt coalescing during PBQP register allocation."), cl::init(false), cl::Hidden); +#ifndef NDEBUG static cl::opt -pbqpPreSplitting("pbqp-pre-splitting", - cl::desc("Pre-split before PBQP register allocation."), - cl::init(false), cl::Hidden); +pbqpDumpGraphs("pbqp-dump-graphs", + cl::desc("Dump graphs for each function/round in the compilation unit."), + cl::init(false), cl::Hidden); +#endif namespace { @@ -88,11 +94,9 @@ class RegAllocPBQP : public MachineFunctionPass { : MachineFunctionPass(ID), builder(b), customPassID(cPassID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); - initializeLoopSplitterPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry()); } @@ -132,6 +136,7 @@ class RegAllocPBQP : public MachineFunctionPass { MachineRegisterInfo *mri; RenderMachineFunction *rmf; + std::auto_ptr spiller; LiveIntervals *lis; LiveStacks *lss; VirtRegMap *vrm; @@ -141,10 +146,6 @@ class RegAllocPBQP : public MachineFunctionPass { /// \brief Finds the initial set of vreg intervals to allocate. void findVRegIntervalsToAlloc(); - /// \brief Adds a stack interval if the given live interval has been - /// spilled. Used to support stack slot coloring. - void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri); - /// \brief Given a solved PBQP problem maps this solution back to a register /// assignment. bool mapPBQPToRegAlloc(const PBQPRAProblem &problem, @@ -170,7 +171,7 @@ PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const { VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg); assert(nodeItr != vreg2Node.end() && "No node for vreg."); return nodeItr->second; - + } const PBQPRAProblem::AllowedSet& @@ -195,9 +196,9 @@ std::auto_ptr PBQPBuilder::build(MachineFunction *mf, const RegSet &vregs) { typedef std::vector LIVector; - + ArrayRef regMaskSlots = lis->getRegMaskSlots(); MachineRegisterInfo *mri = &mf->getRegInfo(); - const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); + const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); std::auto_ptr p(new PBQPRAProblem()); PBQP::Graph &g = p->getGraph(); @@ -214,7 +215,7 @@ std::auto_ptr PBQPBuilder::build(MachineFunction *mf, BitVector reservedRegs = tri->getReservedRegs(*mf); - // Iterate over vregs. + // Iterate over vregs. for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end(); vregItr != vregEnd; ++vregItr) { unsigned vreg = *vregItr; @@ -224,7 +225,7 @@ std::auto_ptr PBQPBuilder::build(MachineFunction *mf, // Compute an initial allowed set for the current vreg. typedef std::vector VRAllowed; VRAllowed vrAllowed; - ArrayRef rawOrder = trc->getRawAllocationOrder(*mf); + ArrayRef rawOrder = trc->getRawAllocationOrder(*mf); for (unsigned i = 0; i != rawOrder.size(); ++i) { unsigned preg = rawOrder[i]; if (!reservedRegs.test(preg)) { @@ -232,7 +233,9 @@ std::auto_ptr PBQPBuilder::build(MachineFunction *mf, } } - // Remove any physical registers which overlap. + RegSet overlappingPRegs; + + // Record physical registers whose ranges overlap. for (RegSet::const_iterator pregItr = pregs.begin(), pregEnd = pregs.end(); pregItr != pregEnd; ++pregItr) { @@ -243,9 +246,41 @@ std::auto_ptr PBQPBuilder::build(MachineFunction *mf, continue; } - if (!vregLI->overlaps(*pregLI)) { - continue; + if (vregLI->overlaps(*pregLI)) + overlappingPRegs.insert(preg); + } + + // Record any overlaps with regmask operands. + BitVector regMaskOverlaps(tri->getNumRegs()); + for (ArrayRef::iterator rmItr = regMaskSlots.begin(), + rmEnd = regMaskSlots.end(); + rmItr != rmEnd; ++rmItr) { + SlotIndex rmIdx = *rmItr; + if (vregLI->liveAt(rmIdx)) { + MachineInstr *rmMI = lis->getInstructionFromIndex(rmIdx); + const uint32_t* regMask = 0; + for (MachineInstr::mop_iterator mopItr = rmMI->operands_begin(), + mopEnd = rmMI->operands_end(); + mopItr != mopEnd; ++mopItr) { + if (mopItr->isRegMask()) { + regMask = mopItr->getRegMask(); + break; + } + } + assert(regMask != 0 && "Couldn't find register mask."); + regMaskOverlaps.setBitsNotInMask(regMask); } + } + + for (unsigned preg = 0; preg < tri->getNumRegs(); ++preg) { + if (regMaskOverlaps.test(preg)) + overlappingPRegs.insert(preg); + } + + for (RegSet::const_iterator pregItr = overlappingPRegs.begin(), + pregEnd = overlappingPRegs.end(); + pregItr != pregEnd; ++pregItr) { + unsigned preg = *pregItr; // Remove the register from the allowed set. VRAllowed::iterator eraseItr = @@ -256,7 +291,7 @@ std::auto_ptr PBQPBuilder::build(MachineFunction *mf, } // Also remove any aliases. - const unsigned *aliasItr = tri->getAliasSet(preg); + const uint16_t *aliasItr = tri->getAliasSet(preg); if (aliasItr != 0) { for (; *aliasItr != 0; ++aliasItr) { VRAllowed::iterator eraseItr = @@ -270,7 +305,7 @@ std::auto_ptr PBQPBuilder::build(MachineFunction *mf, } // Construct the node. - PBQP::Graph::NodeItr node = + PBQP::Graph::NodeItr node = g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0)); // Record the mapping and allowed set in the problem. @@ -371,7 +406,7 @@ std::auto_ptr PBQPBuilderWithCoalescing::build( const float copyFactor = 0.5; // Cost of copy relative to load. Current // value plucked randomly out of the air. - + PBQP::PBQPNum cBenefit = copyFactor * LiveIntervals::getSpillWeight(false, true, loopInfo->getLoopDepth(mbb)); @@ -382,7 +417,7 @@ std::auto_ptr PBQPBuilderWithCoalescing::build( } const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src); - unsigned pregOpt = 0; + unsigned pregOpt = 0; while (pregOpt < allowed.size() && allowed[pregOpt] != dst) { ++pregOpt; } @@ -407,7 +442,7 @@ std::auto_ptr PBQPBuilderWithCoalescing::build( std::swap(allowed1, allowed2); } } - + addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2, cBenefit); } @@ -439,27 +474,29 @@ void PBQPBuilderWithCoalescing::addVirtRegCoalesce( if (preg1 == preg2) { costMat[i + 1][j + 1] += -benefit; - } + } } } } void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { + au.setPreservesCFG(); + au.addRequired(); + au.addPreserved(); au.addRequired(); au.addPreserved(); au.addRequired(); //au.addRequiredID(SplitCriticalEdgesID); - au.addRequiredID(RegisterCoalescerPassID); if (customPassID) au.addRequiredID(*customPassID); au.addRequired(); au.addRequired(); au.addPreserved(); + au.addRequired(); + au.addPreserved(); au.addRequired(); au.addPreserved(); - if (pbqpPreSplitting) - au.addRequired(); au.addRequired(); au.addRequired(); MachineFunctionPass::getAnalysisUsage(au); @@ -488,29 +525,6 @@ void RegAllocPBQP::findVRegIntervalsToAlloc() { } } -void RegAllocPBQP::addStackInterval(const LiveInterval *spilled, - MachineRegisterInfo* mri) { - int stackSlot = vrm->getStackSlot(spilled->reg); - - if (stackSlot == VirtRegMap::NO_STACK_SLOT) { - return; - } - - const TargetRegisterClass *RC = mri->getRegClass(spilled->reg); - LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC); - - VNInfo *vni; - if (stackInterval.getNumValNums() != 0) { - vni = stackInterval.getValNumInfo(0); - } else { - vni = stackInterval.getNextValue( - SlotIndex(), 0, lss->getVNInfoAllocator()); - } - - LiveInterval &rhsInterval = lis->getInterval(spilled->reg); - stackInterval.MergeRangesInAsValue(rhsInterval, vni); -} - bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, const PBQP::Solution &solution) { // Set to true if we have any spills @@ -529,28 +543,22 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, unsigned alloc = solution.getSelection(node); if (problem.isPRegOption(vreg, alloc)) { - unsigned preg = problem.getPRegForOption(vreg, alloc); + unsigned preg = problem.getPRegForOption(vreg, alloc); DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n"); assert(preg != 0 && "Invalid preg selected."); - vrm->assignVirt2Phys(vreg, preg); + vrm->assignVirt2Phys(vreg, preg); } else if (problem.isSpillOption(vreg, alloc)) { vregsToAlloc.erase(vreg); - const LiveInterval* spillInterval = &lis->getInterval(vreg); - double oldWeight = spillInterval->weight; - rmf->rememberUseDefs(spillInterval); - std::vector newSpills = - lis->addIntervalsForSpills(*spillInterval, 0, loopInfo, *vrm); - addStackInterval(spillInterval, mri); - rmf->rememberSpills(spillInterval, newSpills); + SmallVector newSpills; + LiveRangeEdit LRE(lis->getInterval(vreg), newSpills, *mf, *lis, vrm); + spiller->spill(LRE); - (void) oldWeight; DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: " - << oldWeight << ", New vregs: "); + << LRE.getParent().weight << ", New vregs: "); // Copy any newly inserted live intervals into the list of regs to // allocate. - for (std::vector::const_iterator - itr = newSpills.begin(), end = newSpills.end(); + for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end(); itr != end; ++itr) { assert(!(*itr)->empty() && "Empty spill range."); DEBUG(dbgs() << (*itr)->reg << " "); @@ -560,9 +568,9 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, DEBUG(dbgs() << ")\n"); // We need another round if spill intervals were added. - anotherRoundNeeded |= !newSpills.empty(); + anotherRoundNeeded |= !LRE.empty(); } else { - assert(false && "Unknown allocation option."); + llvm_unreachable("Unknown allocation option."); } } @@ -642,7 +650,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { tm = &mf->getTarget(); tri = tm->getRegisterInfo(); tii = tm->getInstrInfo(); - mri = &mf->getRegInfo(); + mri = &mf->getRegInfo(); lis = &getAnalysis(); lss = &getAnalysis(); @@ -650,7 +658,9 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { rmf = &getAnalysis(); vrm = &getAnalysis(); + spiller.reset(createInlineSpiller(*this, MF, *vrm)); + mri->freezeReservedRegs(MF); DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n"); @@ -666,6 +676,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { // Find the vreg intervals in need of allocation. findVRegIntervalsToAlloc(); + const Function* func = mf->getFunction(); + std::string fqn = + func->getParent()->getModuleIdentifier() + "." + + func->getName().str(); + (void)fqn; + // If there are non-empty intervals allocate them using pbqp. if (!vregsToAlloc.empty()) { @@ -677,6 +693,20 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { std::auto_ptr problem = builder->build(mf, lis, loopInfo, vregsToAlloc); + +#ifndef NDEBUG + if (pbqpDumpGraphs) { + std::ostringstream rs; + rs << round; + std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph"); + std::string tmp; + raw_fd_ostream os(graphFileName.c_str(), tmp); + DEBUG(dbgs() << "Dumping graph for round " << round << " to \"" + << graphFileName << "\"\n"); + problem->getGraph().dump(os); + } +#endif + PBQP::Solution solution = PBQP::HeuristicSolver::solve( problem->getGraph()); @@ -698,9 +728,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); // Run rewriter - std::auto_ptr rewriter(createVirtRegRewriter()); + vrm->rewrite(lis->getSlotIndexes()); - rewriter->runOnMachineFunction(*mf, *vrm, lis); + // All machine operands and other references to virtual registers have been + // replaced. Remove the virtual registers. + vrm->clearAllVirt(); + mri->clearVirtRegs(); return true; } diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp index 786d279c2b8c..17165fa72665 100644 --- a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -18,12 +18,16 @@ #include "RegisterClassInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Target/TargetMachine.h" - +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +static cl::opt +StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"), + cl::desc("Limit all regclasses to N registers")); + RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(0), TRI(0), CalleeSaved(0) {} @@ -39,14 +43,14 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { } // Does this MF have different CSRs? - const unsigned *CSR = TRI->getCalleeSavedRegs(MF); + const uint16_t *CSR = TRI->getCalleeSavedRegs(MF); if (Update || CSR != CalleeSaved) { // Build a CSRNum map. Every CSR alias gets an entry pointing to the last // overlapping CSR. CSRNum.clear(); CSRNum.resize(TRI->getNumRegs(), 0); for (unsigned N = 0; unsigned Reg = CSR[N]; ++N) - for (const unsigned *AS = TRI->getOverlaps(Reg); + for (const uint16_t *AS = TRI->getOverlaps(Reg); unsigned Alias = *AS; ++AS) CSRNum[Alias] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ... Update = true; @@ -81,7 +85,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { // FIXME: Once targets reserve registers instead of removing them from the // allocation order, we can simply use begin/end here. - ArrayRef RawOrder = RC->getRawAllocationOrder(*MF); + ArrayRef RawOrder = RC->getRawAllocationOrder(*MF); for (unsigned i = 0; i != RawOrder.size(); ++i) { unsigned PhysReg = RawOrder[i]; // Remove reserved registers from the allocation order. @@ -99,6 +103,10 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { // CSR aliases go after the volatile registers, preserve the target's order. std::copy(CSRAlias.begin(), CSRAlias.end(), &RCI.Order[N]); + // Register allocator stress test. Clip register class to N registers. + if (StressRA && RCI.NumRegs > StressRA) + RCI.NumRegs = StressRA; + // Check if RC is a proper sub-class. if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC)) if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs) diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.h b/contrib/llvm/lib/CodeGen/RegisterClassInfo.h index 2c1407096cd7..400e1f48ce54 100644 --- a/contrib/llvm/lib/CodeGen/RegisterClassInfo.h +++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.h @@ -49,7 +49,7 @@ class RegisterClassInfo { // Callee saved registers of last MF. Assumed to be valid until the next // runOnFunction() call. - const unsigned *CalleeSaved; + const uint16_t *CalleeSaved; // Map register number to CalleeSaved index + 1; SmallVector CSRNum; diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp index 9b414d6212c7..75f88cafdf01 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regcoalescing" +#define DEBUG_TYPE "regalloc" #include "RegisterCoalescer.h" #include "LiveDebugVariables.h" #include "RegisterClassInfo.h" @@ -169,10 +169,6 @@ namespace { /// it as well. bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI); - /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the - /// VNInfo copy flag for DstReg and all aliases. - void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI); - /// markAsJoined - Remember that CopyMI has already been joined. void markAsJoined(MachineInstr *CopyMI); @@ -197,7 +193,7 @@ namespace { }; } /// end anonymous namespace -char &llvm::RegisterCoalescerPassID = RegisterCoalescer::ID; +char &llvm::RegisterCoalescerID = RegisterCoalescer::ID; INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", "Simple Register Coalescing", false, false) @@ -205,9 +201,6 @@ INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) -INITIALIZE_PASS_DEPENDENCY(PHIElimination) -INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", "Simple Register Coalescing", false, false) @@ -379,9 +372,6 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addPreserved(); AU.addPreservedID(MachineDominatorsID); - AU.addPreservedID(StrongPHIEliminationID); - AU.addPreservedID(PHIEliminationID); - AU.addPreservedID(TwoAddressInstructionPassID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -423,7 +413,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); LiveInterval &IntB = LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex(); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); // BValNo is a value number in B that is defined by a copy from A. 'B3' in // the example above. @@ -434,40 +424,19 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, // Get the location that B is defined at. Two options: either this value has // an unknown definition point or it is defined at CopyIdx. If unknown, we // can't process it. - if (!BValNo->isDefByCopy()) return false; - assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); + if (BValNo->def != CopyIdx) return false; // AValNo is the value number in A that defines the copy, A3 in the example. - SlotIndex CopyUseIdx = CopyIdx.getUseIndex(); + SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true); LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); // The live range might not exist after fun with physreg coalescing. if (ALR == IntA.end()) return false; VNInfo *AValNo = ALR->valno; - // If it's re-defined by an early clobber somewhere in the live range, then - // it's not safe to eliminate the copy. FIXME: This is a temporary workaround. - // See PR3149: - // 172 %ECX = MOV32rr %reg1039 - // 180 INLINEASM , 10, %EAX, 14, %ECX, 9, - // %EAX, - // 36, , 1, %reg0, 0, 9, %ECX, 36, , 1, %reg0, 0 - // 188 %EAX = MOV32rr %EAX - // 196 %ECX = MOV32rr %ECX - // 204 %ECX = MOV32rr %ECX - // 212 %EAX = MOV32rr %EAX - // 220 %EAX = MOV32rr %EAX - // 228 %reg1039 = MOV32rr %ECX - // The early clobber operand ties ECX input to the ECX def. - // - // The live interval of ECX is represented as this: - // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47) - // The coalescer has no idea there was a def in the middle of [174,230]. - if (AValNo->hasRedefByEC()) - return false; // If AValNo is defined as a copy from IntB, we can potentially process this. // Get the instruction that defines this value number. - if (!CP.isCoalescable(AValNo->getCopy())) + MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def); + if (!CP.isCoalescable(ACopyMI)) return false; // Get the LiveRange in IntB that this value number starts with. @@ -492,7 +461,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, // of its aliases is overlapping the live interval of the virtual register. // If so, do not coalesce. if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { - for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS) + for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS) if (LIS->hasInterval(*AS) && IntA.overlaps(LIS->getInterval(*AS))) { DEBUG({ dbgs() << "\t\tInterfere with alias "; @@ -511,8 +480,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, // We are about to delete CopyMI, so need to remove it as the 'instruction // that defines this value #'. Update the valnum with the new defining // instruction #. - BValNo->def = FillerStart; - BValNo->setCopy(0); + BValNo->def = FillerStart; // Okay, we can merge them. We need to insert a new liverange: // [ValLR.end, BLR.begin) of either value number, then we merge the @@ -522,12 +490,12 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, // If the IntB live range is assigned to a physical register, and if that // physreg has sub-registers, update their live intervals as well. if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { - for (const unsigned *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) { + for (const uint16_t *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) { if (!LIS->hasInterval(*SR)) continue; LiveInterval &SRLI = LIS->getInterval(*SR); SRLI.addRange(LiveRange(FillerStart, FillerEnd, - SRLI.getNextValue(FillerStart, 0, + SRLI.getNextValue(FillerStart, LIS->getVNInfoAllocator()))); } } @@ -554,9 +522,11 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, ValLREndInst->getOperand(UIdx).setIsKill(false); } - // If the copy instruction was killing the destination register before the - // merge, find the last use and trim the live range. That will also add the - // isKill marker. + // Rewrite the copy. If the copy instruction was killing the destination + // register before the merge, find the last use and trim the live range. That + // will also add the isKill marker. + CopyMI->substituteRegister(IntA.reg, IntB.reg, CP.getSubIdx(), + *TRI); if (ALR->end == CopyIdx) LIS->shrinkToUses(&IntA); @@ -625,7 +595,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, if (!LIS->hasInterval(CP.getDstReg())) return false; - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex(); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); LiveInterval &IntA = LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); @@ -635,13 +605,13 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, // BValNo is a value number in B that is defined by a copy from A. 'B3' in // the example above. VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); - if (!BValNo || !BValNo->isDefByCopy()) + if (!BValNo || BValNo->def != CopyIdx) return false; assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); // AValNo is the value number in A that defines the copy, A3 in the example. - VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex()); + VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); assert(AValNo && "COPY source not live"); // If other defs can reach uses of this def, then it's not safe to perform @@ -651,8 +621,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); if (!DefMI) return false; - const MCInstrDesc &MCID = DefMI->getDesc(); - if (!MCID.isCommutable()) + if (!DefMI->isCommutable()) return false; // If DefMI is a two-address instruction then commuting it will change the // destination register. @@ -684,7 +653,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, // Abort if the aliases of IntB.reg have values that are not simply the // clobbers from the superreg. if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) - for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS) + for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS) if (LIS->hasInterval(*AS) && HasOtherReachingDefs(IntA, LIS->getInterval(*AS), AValNo, 0)) return false; @@ -718,7 +687,8 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, return false; if (NewMI != DefMI) { LIS->ReplaceMachineInstrInMaps(DefMI, NewMI); - MBB->insert(DefMI, NewMI); + MachineBasicBlock::iterator Pos = DefMI; + MBB->insert(Pos, NewMI); MBB->erase(DefMI); } unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); @@ -747,7 +717,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, UseMO.setReg(NewReg); continue; } - SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getUseIndex(); + SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true); LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end() || ULR->valno != AValNo) continue; @@ -765,7 +735,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, // This copy will become a noop. If it's defining a new val#, merge it into // BValNo. - SlotIndex DefIdx = UseIdx.getDefIndex(); + SlotIndex DefIdx = UseIdx.getRegSlot(); VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); if (!DVNI) continue; @@ -779,7 +749,6 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, // is updated. VNInfo *ValNo = BValNo; ValNo->def = AValNo->def; - ValNo->setCopy(0); for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; @@ -799,7 +768,7 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, bool preserveSrcInt, unsigned DstReg, MachineInstr *CopyMI) { - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getUseIndex(); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); assert(SrcLR != SrcInt.end() && "Live range not found!"); VNInfo *ValNo = SrcLR->valno; @@ -809,14 +778,14 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, if (!DefMI) return false; assert(DefMI && "Defining instruction disappeared"); - const MCInstrDesc &MCID = DefMI->getDesc(); - if (!MCID.isAsCheapAsAMove()) + if (!DefMI->isAsCheapAsAMove()) return false; if (!TII->isTriviallyReMaterializable(DefMI, AA)) return false; bool SawStore = false; if (!DefMI->isSafeToMove(TII, AA, SawStore)) return false; + const MCInstrDesc &MCID = DefMI->getDesc(); if (MCID.getNumDefs() != 1) return false; if (!DefMI->isImplicitDef()) { @@ -831,27 +800,52 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, return false; } - RemoveCopyFlag(DstReg, CopyMI); - MachineBasicBlock *MBB = CopyMI->getParent(); MachineBasicBlock::iterator MII = llvm::next(MachineBasicBlock::iterator(CopyMI)); TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI); MachineInstr *NewMI = prior(MII); + // NewMI may have dead implicit defs (E.g. EFLAGS for MOVr0 on X86). + // We need to remember these so we can add intervals once we insert + // NewMI into SlotIndexes. + SmallVector NewMIImplDefs; + for (unsigned i = NewMI->getDesc().getNumOperands(), + e = NewMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = NewMI->getOperand(i); + if (MO.isReg()) { + assert(MO.isDef() && MO.isImplicit() && MO.isDead() && + TargetRegisterInfo::isPhysicalRegister(MO.getReg())); + NewMIImplDefs.push_back(MO.getReg()); + } + } + // CopyMI may have implicit operands, transfer them over to the newly // rematerialized instruction. And update implicit def interval valnos. for (unsigned i = CopyMI->getDesc().getNumOperands(), e = CopyMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = CopyMI->getOperand(i); - if (MO.isReg() && MO.isImplicit()) - NewMI->addOperand(MO); - if (MO.isDef()) - RemoveCopyFlag(MO.getReg(), CopyMI); + if (MO.isReg()) { + assert(MO.isImplicit() && "No explicit operands after implict operands."); + // Discard VReg implicit defs. + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + NewMI->addOperand(MO); + } + } } - NewMI->copyImplicitOps(CopyMI); LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); + + SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); + for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { + unsigned reg = NewMIImplDefs[i]; + LiveInterval &li = LIS->getInterval(reg); + VNInfo *DeadDefVN = li.getNextValue(NewMIIdx.getRegSlot(), + LIS->getVNInfoAllocator()); + LiveRange lr(NewMIIdx.getRegSlot(), NewMIIdx.getDeadSlot(), DeadDefVN); + li.addRange(lr); + } + CopyMI->eraseFromParent(); ReMatCopies.insert(CopyMI); ReMatDefs.insert(DefMI); @@ -887,7 +881,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, DstInt = SrcInt; SrcInt = 0; - VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getDefIndex()); + VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot()); assert(DeadVNI && "No value defined in DstInt"); DstInt->removeValNo(DeadVNI); @@ -941,13 +935,10 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { SmallVector Ops; bool Reads, Writes; tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); - bool Kills = false, Deads = false; // Replace SrcReg with DstReg in all UseMI operands. for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = UseMI->getOperand(Ops[i]); - Kills |= MO.isKill(); - Deads |= MO.isDead(); // Make sure we don't create read-modify-write defs accidentally. We // assume here that a SrcReg def cannot be joined into a live DstReg. If @@ -967,19 +958,6 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { if (JoinedCopies.count(UseMI)) continue; - if (SubIdx) { - // If UseMI was a simple SrcReg def, make sure we didn't turn it into a - // read-modify-write of DstReg. - if (Deads) - UseMI->addRegisterDead(DstReg, TRI); - else if (!Reads && Writes) - UseMI->addRegisterDefined(DstReg, TRI); - - // Kill flags apply to the whole physical register. - if (DstIsPhys && Kills) - UseMI->addRegisterKilled(DstReg, TRI); - } - DEBUG({ dbgs() << "\t\tupdated: "; if (!UseMI->isDebugValue()) @@ -996,7 +974,7 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS, const TargetRegisterInfo *TRI) { if (li.empty()) { if (TargetRegisterInfo::isPhysicalRegister(li.reg)) - for (const unsigned* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) { + for (const uint16_t* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) { if (!LIS->hasInterval(*SR)) continue; LiveInterval &sli = LIS->getInterval(*SR); @@ -1013,7 +991,7 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS, /// the val# it defines. If the live interval becomes empty, remove it as well. bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI) { - SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getDefIndex(); + SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getRegSlot(); LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); if (DefIdx != MLR->valno->def) return false; @@ -1021,27 +999,6 @@ bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li, return removeIntervalIfEmpty(li, LIS, TRI); } -void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg, - const MachineInstr *CopyMI) { - SlotIndex DefIdx = LIS->getInstructionIndex(CopyMI).getDefIndex(); - if (LIS->hasInterval(DstReg)) { - LiveInterval &LI = LIS->getInterval(DstReg); - if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) - if (LR->valno->def == DefIdx) - LR->valno->setCopy(0); - } - if (!TargetRegisterInfo::isPhysicalRegister(DstReg)) - return; - for (const unsigned* AS = TRI->getAliasSet(DstReg); *AS; ++AS) { - if (!LIS->hasInterval(*AS)) - continue; - LiveInterval &LI = LIS->getInterval(*AS); - if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) - if (LR->valno->def == DefIdx) - LR->valno->setCopy(0); - } -} - /// shouldJoinPhys - Return true if a copy involving a physreg should be joined. /// We need to be careful about coalescing a source physical register with a /// virtual register. Once the coalescing is done, it cannot be broken and these @@ -1279,7 +1236,7 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { } } - // SrcReg is guarateed to be the register whose live interval that is + // SrcReg is guaranteed to be the register whose live interval that is // being merged. LIS->removeInterval(CP.getSrcReg()); @@ -1368,9 +1325,9 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li, // FIXME: This is very conservative. For example, we don't handle // physical registers. - MachineInstr *MI = VNI->getCopy(); + MachineInstr *MI = li.getInstructionFromIndex(VNI->def); - if (!MI->isFullCopy() || CP.isPartial() || CP.isPhys()) + if (!MI || !MI->isFullCopy() || CP.isPartial() || CP.isPhys()) return false; unsigned Dst = MI->getOperand(0).getReg(); @@ -1388,11 +1345,9 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li, assert(Dst == A); VNInfo *Other = LR->valno; - if (!Other->isDefByCopy()) - return false; - const MachineInstr *OtherMI = Other->getCopy(); + const MachineInstr *OtherMI = li.getInstructionFromIndex(Other->def); - if (!OtherMI->isFullCopy()) + if (!OtherMI || !OtherMI->isFullCopy()) return false; unsigned OtherDst = OtherMI->getOperand(0).getReg(); @@ -1431,7 +1386,44 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { // than the full interfeence check below. We allow overlapping live ranges // only when one is a copy of the other. if (CP.isPhys()) { - for (const unsigned *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){ + // Optimization for reserved registers like ESP. + // We can only merge with a reserved physreg if RHS has a single value that + // is a copy of CP.DstReg(). The live range of the reserved register will + // look like a set of dead defs - we don't properly track the live range of + // reserved registers. + if (RegClassInfo.isReserved(CP.getDstReg())) { + assert(CP.isFlipped() && RHS.containsOneValue() && + "Invalid join with reserved register"); + // Deny any overlapping intervals. This depends on all the reserved + // register live ranges to look like dead defs. + for (const uint16_t *AS = TRI->getOverlaps(CP.getDstReg()); *AS; ++AS) { + if (!LIS->hasInterval(*AS)) { + // Make sure at least DstReg itself exists before attempting a join. + if (*AS == CP.getDstReg()) + LIS->getOrCreateInterval(CP.getDstReg()); + continue; + } + if (RHS.overlaps(LIS->getInterval(*AS))) { + DEBUG(dbgs() << "\t\tInterference: " << PrintReg(*AS, TRI) << '\n'); + return false; + } + } + // Skip any value computations, we are not adding new values to the + // reserved register. Also skip merging the live ranges, the reserved + // register live range doesn't need to be accurate as long as all the + // defs are there. + return true; + } + + // Check if a register mask clobbers DstReg. + BitVector UsableRegs; + if (LIS->checkRegMaskInterference(RHS, UsableRegs) && + !UsableRegs.test(CP.getDstReg())) { + DEBUG(dbgs() << "\t\tRegister mask interference.\n"); + return false; + } + + for (const uint16_t *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){ if (!LIS->hasInterval(*AS)) continue; const LiveInterval &LHS = LIS->getInterval(*AS); @@ -1485,12 +1477,12 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); i != e; ++i) { VNInfo *VNI = *i; - if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy? + if (VNI->isUnused() || VNI->isPHIDef()) + continue; + MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); + assert(MI && "Missing def"); + if (!MI->isCopyLike()) // Src not defined by a copy? continue; - - // Never join with a register that has EarlyClobber redefs. - if (VNI->hasRedefByEC()) - return false; // Figure out the value # from the RHS. LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); @@ -1499,7 +1491,6 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { // DstReg is known to be a register in the LHS interval. If the src is // from the RHS interval, we can use its value #. - MachineInstr *MI = VNI->getCopy(); if (!CP.isCoalescable(MI) && !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies)) continue; @@ -1512,12 +1503,12 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); i != e; ++i) { VNInfo *VNI = *i; - if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy? + if (VNI->isUnused() || VNI->isPHIDef()) + continue; + MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); + assert(MI && "Missing def"); + if (!MI->isCopyLike()) // Src not defined by a copy? continue; - - // Never join with a register that has EarlyClobber redefs. - if (VNI->hasRedefByEC()) - return false; // Figure out the value # from the LHS. LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); @@ -1526,7 +1517,6 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { // DstReg is known to be a register in the RHS interval. If the src is // from the LHS interval, we can use its value #. - MachineInstr *MI = VNI->getCopy(); if (!CP.isCoalescable(MI) && !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies)) continue; @@ -1600,10 +1590,6 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { if (LHSValNoAssignments[I->valno->id] != RHSValNoAssignments[J->valno->id]) return false; - // If it's re-defined by an early clobber somewhere in the live range, - // then conservatively abort coalescing. - if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC()) - return false; } if (I->end < J->end) @@ -1905,8 +1891,8 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { unsigned Reg = MO.getReg(); if (!Reg) continue; + DeadDefs.push_back(Reg); if (TargetRegisterInfo::isVirtualRegister(Reg)) { - DeadDefs.push_back(Reg); // Remat may also enable register class inflation. if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg))) InflateRegs.push_back(Reg); @@ -1936,7 +1922,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { // Check for now unnecessary kill flags. if (LIS->isNotInMIMap(MI)) continue; - SlotIndex DefIdx = LIS->getInstructionIndex(MI).getDefIndex(); + SlotIndex DefIdx = LIS->getInstructionIndex(MI).getRegSlot(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isKill()) continue; @@ -1950,7 +1936,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { // remain alive. if (!TargetRegisterInfo::isPhysicalRegister(reg)) continue; - for (const unsigned *SR = TRI->getSubRegisters(reg); + for (const uint16_t *SR = TRI->getSubRegisters(reg); unsigned S = *SR; ++SR) if (LIS->hasInterval(S) && LIS->getInterval(S).liveAt(DefIdx)) MI->addRegisterDefined(S, TRI); diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h index 472c48377fef..310b933cab9b 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h @@ -1,4 +1,4 @@ -//===-- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===// +//===-- RegisterCoalescer.h - Register Coalescing Interface -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file contains the abstract interface for register coalescers, +// This file contains the abstract interface for register coalescers, // allowing them to interact with and query register allocators. // //===----------------------------------------------------------------------===// @@ -47,7 +47,7 @@ namespace llvm { /// CrossClass - True when both regs are virtual, and newRC is constrained. bool CrossClass; - /// Flipped - True when DstReg and SrcReg are reversed from the oriignal + /// Flipped - True when DstReg and SrcReg are reversed from the original /// copy instruction. bool Flipped; diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp index ca02aa1b8143..03bd82e225dc 100644 --- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -37,7 +37,7 @@ using namespace llvm; void RegScavenger::setUsed(unsigned Reg) { RegsAvailable.reset(Reg); - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) RegsAvailable.reset(SubReg); } @@ -45,7 +45,7 @@ void RegScavenger::setUsed(unsigned Reg) { bool RegScavenger::isAliasUsed(unsigned Reg) const { if (isUsed(Reg)) return true; - for (const unsigned *R = TRI->getAliasSet(Reg); *R; ++R) + for (const uint16_t *R = TRI->getAliasSet(Reg); *R; ++R) if (isUsed(*R)) return true; return false; @@ -59,9 +59,6 @@ void RegScavenger::initRegState() { // All registers started out unused. RegsAvailable.set(); - // Reserved registers are always used. - RegsAvailable ^= ReservedRegs; - if (!MBB) return; @@ -86,17 +83,24 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) && "Target changed?"); + // It is not possible to use the register scavenger after late optimization + // passes that don't preserve accurate liveness information. + assert(MRI->tracksLiveness() && + "Cannot use register scavenger with inaccurate liveness"); + // Self-initialize. if (!MBB) { NumPhysRegs = TRI->getNumRegs(); RegsAvailable.resize(NumPhysRegs); + KillRegs.resize(NumPhysRegs); + DefRegs.resize(NumPhysRegs); // Create reserved registers bitvector. ReservedRegs = TRI->getReservedRegs(MF); // Create callee-saved registers bitvector. CalleeSavedRegs.resize(NumPhysRegs); - const unsigned *CSRegs = TRI->getCalleeSavedRegs(); + const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF); if (CSRegs != NULL) for (unsigned i = 0; CSRegs[i]; ++i) CalleeSavedRegs.set(CSRegs[i]); @@ -110,13 +114,7 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) { BV.set(Reg); - for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++) - BV.set(*R); -} - -void RegScavenger::addRegWithAliases(BitVector &BV, unsigned Reg) { - BV.set(Reg); - for (const unsigned *R = TRI->getAliasSet(Reg); *R; R++) + for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++) BV.set(*R); } @@ -148,12 +146,12 @@ void RegScavenger::forward() { // predicated, conservatively assume "kill" markers do not actually kill the // register. Similarly ignores "dead" markers. bool isPred = TII->isPredicated(MI); - BitVector EarlyClobberRegs(NumPhysRegs); - BitVector KillRegs(NumPhysRegs); - BitVector DefRegs(NumPhysRegs); - BitVector DeadRegs(NumPhysRegs); + KillRegs.reset(); + DefRegs.reset(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + (isPred ? DefRegs : KillRegs).setBitsNotInMask(MO.getRegMask()); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); @@ -164,21 +162,19 @@ void RegScavenger::forward() { // Ignore undef uses. if (MO.isUndef()) continue; - // Two-address operands implicitly kill. - if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i))) + if (!isPred && MO.isKill()) addRegWithSubRegs(KillRegs, Reg); } else { assert(MO.isDef()); if (!isPred && MO.isDead()) - addRegWithSubRegs(DeadRegs, Reg); + addRegWithSubRegs(KillRegs, Reg); else addRegWithSubRegs(DefRegs, Reg); - if (MO.isEarlyClobber()) - addRegWithAliases(EarlyClobberRegs, Reg); } } // Verify uses and defs. +#ifndef NDEBUG for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) @@ -199,17 +195,18 @@ void RegScavenger::forward() { // Ideally we would like a way to model this, but leaving the // insert_subreg around causes both correctness and performance issues. bool SubUsed = false; - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) if (isUsed(SubReg)) { SubUsed = true; break; } - assert(SubUsed && "Using an undefined register!"); + if (!SubUsed) { + MBB->getParent()->verify(NULL, "In Register Scavenger"); + llvm_unreachable("Using an undefined register!"); + } (void)SubUsed; } - assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) && - "Using an early clobbered register!"); } else { assert(MO.isDef()); #if 0 @@ -221,18 +218,20 @@ void RegScavenger::forward() { #endif } } +#endif // NDEBUG // Commit the changes. setUnused(KillRegs); - setUnused(DeadRegs); setUsed(DefRegs); } void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) { + used = RegsAvailable; + used.flip(); if (includeReserved) - used = ~RegsAvailable; + used |= ReservedRegs; else - used = ~RegsAvailable & ~ReservedRegs; + used.reset(ReservedRegs); } unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { @@ -286,6 +285,8 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, // Remove any candidates touched by instruction. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + Candidates.clearBitsNotInMask(MO.getRegMask()); if (!MO.isReg() || MO.isUndef() || !MO.getReg()) continue; if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { @@ -296,7 +297,7 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, continue; } Candidates.reset(MO.getReg()); - for (const unsigned *R = TRI->getAliasSet(MO.getReg()); *R; R++) + for (const uint16_t *R = TRI->getAliasSet(MO.getReg()); *R; R++) Candidates.reset(*R); } // If we're not in a virtual reg's live range, this is a valid @@ -347,9 +348,9 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // RegsAvailable, as RegsAvailable does not take aliases into account. // That's what getRegsAvailable() is for. BitVector Available = getRegsAvailable(RC); - - if ((Candidates & Available).any()) - Candidates &= Available; + Available &= Candidates; + if (Available.any()) + Candidates = Available; // Find the register whose use is furthest away. MachineBasicBlock::iterator UseMI; diff --git a/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp b/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp index 8b02ec44273a..6020908d9112 100644 --- a/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp +++ b/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----s-----------===// +//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----------------===// // // The LLVM Compiler Infrastructure // @@ -560,12 +560,13 @@ namespace llvm { // For uses/defs recorded use/def indexes override current liveness and // instruction operands (Only for the interval which records the indexes). - if (i.isUse() || i.isDef()) { + // FIXME: This is all wrong, uses and defs share the same slots. + if (i.isEarlyClobber() || i.isRegister()) { UseDefs::const_iterator udItr = useDefs.find(li); if (udItr != useDefs.end()) { const SlotSet &slotSet = udItr->second; if (slotSet.count(i)) { - if (i.isUse()) { + if (i.isEarlyClobber()) { return Used; } // else @@ -586,9 +587,9 @@ namespace llvm { return AliveStack; } } else { - if (i.isDef() && mi->definesRegister(li->reg, tri)) { + if (i.isRegister() && mi->definesRegister(li->reg, tri)) { return Defined; - } else if (i.isUse() && mi->readsRegister(li->reg)) { + } else if (i.isEarlyClobber() && mi->readsRegister(li->reg)) { return Used; } else { if (vrm == 0 || @@ -804,7 +805,7 @@ namespace llvm { os << indent + s(2) << "\n"; // Render the code column. - if (i.isLoad()) { + if (i.isBlock()) { MachineBasicBlock *mbb = sis->getMBBFromIndex(i); mi = sis->getInstructionFromIndex(i); @@ -823,7 +824,7 @@ namespace llvm { } os << indent + s(4) << "\n"; } else { - i = i.getStoreIndex(); // <- Will be incremented to the next index. + i = i.getDeadSlot(); // <- Will be incremented to the next index. continue; } } @@ -952,10 +953,10 @@ namespace llvm { rItr != rEnd; ++rItr) { const MachineInstr *mi = &*rItr; if (mi->readsRegister(li->reg)) { - useDefs[li].insert(lis->getInstructionIndex(mi).getUseIndex()); + useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot(true)); } if (mi->definesRegister(li->reg)) { - useDefs[li].insert(lis->getInstructionIndex(mi).getDefIndex()); + useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot()); } } } diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp index 1e9b5c89f172..8fd64265fda6 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -31,6 +31,8 @@ static cl::opt StressSchedOpt( cl::desc("Stress test instruction scheduling")); #endif +void SchedulingPriorityQueue::anchor() { } + ScheduleDAG::ScheduleDAG(MachineFunction &mf) : TM(mf.getTarget()), TII(TM.getInstrInfo()), @@ -44,44 +46,19 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf) ScheduleDAG::~ScheduleDAG() {} +/// Clear the DAG state (e.g. between scheduling regions). +void ScheduleDAG::clearDAG() { + SUnits.clear(); + EntrySU = SUnit(); + ExitSU = SUnit(); +} + /// getInstrDesc helper to handle SDNodes. const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { if (!Node || !Node->isMachineOpcode()) return NULL; return &TII->get(Node->getMachineOpcode()); } -/// dump - dump the schedule. -void ScheduleDAG::dumpSchedule() const { - for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - if (SUnit *SU = Sequence[i]) - SU->dump(this); - else - dbgs() << "**** NOOP ****\n"; - } -} - - -/// Run - perform scheduling. -/// -void ScheduleDAG::Run(MachineBasicBlock *bb, - MachineBasicBlock::iterator insertPos) { - BB = bb; - InsertPos = insertPos; - - SUnits.clear(); - Sequence.clear(); - EntrySU = SUnit(); - ExitSU = SUnit(); - - Schedule(); - - DEBUG({ - dbgs() << "*** Final schedule ***\n"; - dumpSchedule(); - dbgs() << '\n'; - }); -} - /// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. @@ -313,13 +290,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { case SDep::Output: dbgs() << "out "; break; case SDep::Order: dbgs() << "ch "; break; } - dbgs() << "#"; - dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; + dbgs() << "SU(" << I->getSUnit()->NodeNum << ")"; if (I->isArtificial()) dbgs() << " *"; dbgs() << ": Latency=" << I->getLatency(); if (I->isAssignedRegDep()) - dbgs() << " Reg=" << G->TRI->getName(I->getReg()); + dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI); dbgs() << "\n"; } } @@ -334,8 +310,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { case SDep::Output: dbgs() << "out "; break; case SDep::Order: dbgs() << "ch "; break; } - dbgs() << "#"; - dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; + dbgs() << "SU(" << I->getSUnit()->NodeNum << ")"; if (I->isArtificial()) dbgs() << " *"; dbgs() << ": Latency=" << I->getLatency(); @@ -346,13 +321,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { } #ifndef NDEBUG -/// VerifySchedule - Verify that all SUnits were scheduled and that -/// their state is consistent. +/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that +/// their state is consistent. Return the number of scheduled nodes. /// -void ScheduleDAG::VerifySchedule(bool isBottomUp) { +unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) { bool AnyNotSched = false; unsigned DeadNodes = 0; - unsigned Noops = 0; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { if (!SUnits[i].isScheduled) { if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) { @@ -393,12 +367,8 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) { } } } - for (unsigned i = 0, e = Sequence.size(); i != e; ++i) - if (!Sequence[i]) - ++Noops; assert(!AnyNotSched); - assert(Sequence.size() + DeadNodes - Noops == SUnits.size() && - "The number of nodes scheduled doesn't match the expected number!"); + return SUnits.size() - DeadNodes; } #endif diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGEmit.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGEmit.cpp deleted file mode 100644 index f8b1bc76eb8b..000000000000 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGEmit.cpp +++ /dev/null @@ -1,68 +0,0 @@ -//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This implements the Emit routines for the ScheduleDAG class, which creates -// MachineInstrs according to the computed schedule. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "pre-RA-sched" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -using namespace llvm; - -void ScheduleDAG::EmitNoop() { - TII->insertNoop(*BB, InsertPos); -} - -void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, - DenseMap &VRBaseMap) { - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) continue; // ignore chain preds - if (I->getSUnit()->CopyDstRC) { - // Copy to physical register. - DenseMap::iterator VRI = VRBaseMap.find(I->getSUnit()); - assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); - // Find the destination physical register. - unsigned Reg = 0; - for (SUnit::const_succ_iterator II = SU->Succs.begin(), - EE = SU->Succs.end(); II != EE; ++II) { - if (II->isCtrl()) continue; // ignore chain preds - if (II->getReg()) { - Reg = II->getReg(); - break; - } - } - BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) - .addReg(VRI->second); - } else { - // Copy from physical register. - assert(I->getReg() && "Unknown physical register!"); - unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); - bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; - (void)isNew; // Silence compiler warning. - assert(isNew && "Node emitted out of order - early"); - BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) - .addReg(I->getReg()); - } - break; - } -} diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 34b8ab0b47f2..6be1ab7f5b08 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -13,14 +13,15 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "sched-instrs" -#include "ScheduleDAGInstrs.h" #include "llvm/Operator.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -33,25 +34,17 @@ using namespace llvm; ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo &mli, - const MachineDominatorTree &mdt) + const MachineDominatorTree &mdt, + bool IsPostRAFlag, + LiveIntervals *lis) : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), - InstrItins(mf.getTarget().getInstrItineraryData()), - Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), - LoopRegs(MLI, MDT), FirstDbgValue(0) { + InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis), + IsPostRA(IsPostRAFlag), UnitLatencies(false), LoopRegs(MLI, MDT), + FirstDbgValue(0) { + assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); -} - -/// Run - perform scheduling. -/// -void ScheduleDAGInstrs::Run(MachineBasicBlock *bb, - MachineBasicBlock::iterator begin, - MachineBasicBlock::iterator end, - unsigned endcount) { - BB = bb; - Begin = begin; - InsertPosIndex = endcount; - - ScheduleDAG::Run(bb, end); + assert(!(IsPostRA && MRI.getNumVirtRegs()) && + "Virtual registers must be removed prior to PostRA scheduling"); } /// getUnderlyingObjectFromInt - This is the function that does the work of @@ -133,19 +126,58 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, return 0; } -void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { +void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) { LoopRegs.Deps.clear(); if (MachineLoop *ML = MLI.getLoopFor(BB)) - if (BB == ML->getLoopLatch()) { - MachineBasicBlock *Header = ML->getHeader(); - for (MachineBasicBlock::livein_iterator I = Header->livein_begin(), - E = Header->livein_end(); I != E; ++I) - LoopLiveInRegs.insert(*I); + if (BB == ML->getLoopLatch()) LoopRegs.VisitLoop(ML); - } } -/// AddSchedBarrierDeps - Add dependencies from instructions in the current +void ScheduleDAGInstrs::finishBlock() { + // Nothing to do. +} + +/// Initialize the map with the number of registers. +void Reg2SUnitsMap::setRegLimit(unsigned Limit) { + PhysRegSet.setUniverse(Limit); + SUnits.resize(Limit); +} + +/// Clear the map without deallocating storage. +void Reg2SUnitsMap::clear() { + for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) { + SUnits[*I].clear(); + } + PhysRegSet.clear(); +} + +/// Initialize the DAG and common scheduler state for the current scheduling +/// region. This does not actually create the DAG, only clears it. The +/// scheduling driver may call BuildSchedGraph multiple times per scheduling +/// region. +void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount) { + BB = bb; + RegionBegin = begin; + RegionEnd = end; + EndIndex = endcount; + MISUnitMap.clear(); + + // Check to see if the scheduler cares about latencies. + UnitLatencies = forceUnitLatencies(); + + ScheduleDAG::clearDAG(); +} + +/// Close the current scheduling region. Don't clear any state in case the +/// driver wants to refer to the previous scheduling region. +void ScheduleDAGInstrs::exitRegion() { + // Nothing to do. +} + +/// addSchedBarrierDeps - Add dependencies from instructions in the current /// list of instructions being scheduled to scheduling barrier by adding /// the exit SU to the register defs and use list. This is because we want to /// make sure instructions which define registers that are either used by @@ -153,11 +185,11 @@ void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { /// especially important when the definition latency of the return value(s) /// are too high to be hidden by the branch or when the liveout registers /// used by instructions in the fallthrough block. -void ScheduleDAGInstrs::AddSchedBarrierDeps() { - MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0; +void ScheduleDAGInstrs::addSchedBarrierDeps() { + MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0; ExitSU.setInstr(ExitMI); bool AllDepKnown = ExitMI && - (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier()); + (ExitMI->isCall() || ExitMI->isBarrier()); if (ExitMI && AllDepKnown) { // If it's a call or a barrier, add dependencies on the defs and uses of // instruction. @@ -167,29 +199,313 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() { unsigned Reg = MO.getReg(); if (Reg == 0) continue; - assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); - Uses[Reg].push_back(&ExitSU); + if (TRI->isPhysicalRegister(Reg)) + Uses[Reg].push_back(&ExitSU); + else { + assert(!IsPostRA && "Virtual register encountered after regalloc."); + addVRegUseDeps(&ExitSU, i); + } } } else { // For others, e.g. fallthrough, conditional branch, assume the exit // uses all the registers that are livein to the successor blocks. - SmallSet Seen; + assert(Uses.empty() && "Uses in set before adding deps?"); for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; - if (Seen.insert(Reg)) + if (!Uses.contains(Reg)) Uses[Reg].push_back(&ExitSU); } } } -void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { - // We'll be allocating one SUnit for each instruction, plus one for - // the region exit node. +/// MO is an operand of SU's instruction that defines a physical register. Add +/// data dependencies from SU to any uses of the physical register. +void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, + const MachineOperand &MO) { + assert(MO.isDef() && "expect physreg def"); + + // Ask the target if address-backscheduling is desirable, and if so how much. + const TargetSubtargetInfo &ST = TM.getSubtarget(); + unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); + unsigned DataLatency = SU->Latency; + + for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { + if (!Uses.contains(*Alias)) + continue; + std::vector &UseList = Uses[*Alias]; + for (unsigned i = 0, e = UseList.size(); i != e; ++i) { + SUnit *UseSU = UseList[i]; + if (UseSU == SU) + continue; + unsigned LDataLatency = DataLatency; + // Optionally add in a special extra latency for nodes that + // feed addresses. + // TODO: Perhaps we should get rid of + // SpecialAddressLatency and just move this into + // adjustSchedDependency for the targets that care about it. + if (SpecialAddressLatency != 0 && !UnitLatencies && + UseSU != &ExitSU) { + MachineInstr *UseMI = UseSU->getInstr(); + const MCInstrDesc &UseMCID = UseMI->getDesc(); + int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias); + assert(RegUseIndex >= 0 && "UseMI doesn't use register!"); + if (RegUseIndex >= 0 && + (UseMI->mayLoad() || UseMI->mayStore()) && + (unsigned)RegUseIndex < UseMCID.getNumOperands() && + UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) + LDataLatency += SpecialAddressLatency; + } + // Adjust the dependence latency using operand def/use + // information (if any), and then allow the target to + // perform its own adjustments. + const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias); + if (!UnitLatencies) { + computeOperandLatency(SU, UseSU, const_cast(dep)); + ST.adjustSchedDependency(SU, UseSU, const_cast(dep)); + } + UseSU->addPred(dep); + } + } +} + +/// addPhysRegDeps - Add register dependencies (data, anti, and output) from +/// this SUnit to following instructions in the same scheduling region that +/// depend the physical register referenced at OperIdx. +void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { + const MachineInstr *MI = SU->getInstr(); + const MachineOperand &MO = MI->getOperand(OperIdx); + + // Optionally add output and anti dependencies. For anti + // dependencies we use a latency of 0 because for a multi-issue + // target we want to allow the defining instruction to issue + // in the same cycle as the using instruction. + // TODO: Using a latency of 1 here for output dependencies assumes + // there's no cost for reusing registers. + SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; + for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { + if (!Defs.contains(*Alias)) + continue; + std::vector &DefList = Defs[*Alias]; + for (unsigned i = 0, e = DefList.size(); i != e; ++i) { + SUnit *DefSU = DefList[i]; + if (DefSU == &ExitSU) + continue; + if (DefSU != SU && + (Kind != SDep::Output || !MO.isDead() || + !DefSU->getInstr()->registerDefIsDead(*Alias))) { + if (Kind == SDep::Anti) + DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias)); + else { + unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx, + DefSU->getInstr()); + DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias)); + } + } + } + } + + if (!MO.isDef()) { + // Either insert a new Reg2SUnits entry with an empty SUnits list, or + // retrieve the existing SUnits list for this register's uses. + // Push this SUnit on the use list. + Uses[MO.getReg()].push_back(SU); + } + else { + addPhysRegDataDeps(SU, MO); + + // Either insert a new Reg2SUnits entry with an empty SUnits list, or + // retrieve the existing SUnits list for this register's defs. + std::vector &DefList = Defs[MO.getReg()]; + + // If a def is going to wrap back around to the top of the loop, + // backschedule it. + if (!UnitLatencies && DefList.empty()) { + LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg()); + if (I != LoopRegs.Deps.end()) { + const MachineOperand *UseMO = I->second.first; + unsigned Count = I->second.second; + const MachineInstr *UseMI = UseMO->getParent(); + unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); + const MCInstrDesc &UseMCID = UseMI->getDesc(); + const TargetSubtargetInfo &ST = + TM.getSubtarget(); + unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); + // TODO: If we knew the total depth of the region here, we could + // handle the case where the whole loop is inside the region but + // is large enough that the isScheduleHigh trick isn't needed. + if (UseMOIdx < UseMCID.getNumOperands()) { + // Currently, we only support scheduling regions consisting of + // single basic blocks. Check to see if the instruction is in + // the same region by checking to see if it has the same parent. + if (UseMI->getParent() != MI->getParent()) { + unsigned Latency = SU->Latency; + if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) + Latency += SpecialAddressLatency; + // This is a wild guess as to the portion of the latency which + // will be overlapped by work done outside the current + // scheduling region. + Latency -= std::min(Latency, Count); + // Add the artificial edge. + ExitSU.addPred(SDep(SU, SDep::Order, Latency, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + } else if (SpecialAddressLatency > 0 && + UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { + // The entire loop body is within the current scheduling region + // and the latency of this operation is assumed to be greater + // than the latency of the loop. + // TODO: Recursively mark data-edge predecessors as + // isScheduleHigh too. + SU->isScheduleHigh = true; + } + } + LoopRegs.Deps.erase(I); + } + } + + // clear this register's use list + if (Uses.contains(MO.getReg())) + Uses[MO.getReg()].clear(); + + if (!MO.isDead()) + DefList.clear(); + + // Calls will not be reordered because of chain dependencies (see + // below). Since call operands are dead, calls may continue to be added + // to the DefList making dependence checking quadratic in the size of + // the block. Instead, we leave only one call at the back of the + // DefList. + if (SU->isCall) { + while (!DefList.empty() && DefList.back()->isCall) + DefList.pop_back(); + } + // Defs are pushed in the order they are visited and never reordered. + DefList.push_back(SU); + } +} + +/// addVRegDefDeps - Add register output and data dependencies from this SUnit +/// to instructions that occur later in the same scheduling region if they read +/// from or write to the virtual register defined at OperIdx. +/// +/// TODO: Hoist loop induction variable increments. This has to be +/// reevaluated. Generally, IV scheduling should be done before coalescing. +void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { + const MachineInstr *MI = SU->getInstr(); + unsigned Reg = MI->getOperand(OperIdx).getReg(); + + // SSA defs do not have output/anti dependencies. + // The current operand is a def, so we have at least one. + if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end()) + return; + + // Add output dependence to the next nearest def of this vreg. + // + // Unless this definition is dead, the output dependence should be + // transitively redundant with antidependencies from this definition's + // uses. We're conservative for now until we have a way to guarantee the uses + // are not eliminated sometime during scheduling. The output dependence edge + // is also useful if output latency exceeds def-use latency. + VReg2SUnitMap::iterator DefI = findVRegDef(Reg); + if (DefI == VRegDefs.end()) + VRegDefs.insert(VReg2SUnit(Reg, SU)); + else { + SUnit *DefSU = DefI->SU; + if (DefSU != SU && DefSU != &ExitSU) { + unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx, + DefSU->getInstr()); + DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg)); + } + DefI->SU = SU; + } +} + +/// addVRegUseDeps - Add a register data dependency if the instruction that +/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a +/// register antidependency from this SUnit to instructions that occur later in +/// the same scheduling region if they write the virtual register. +/// +/// TODO: Handle ExitSU "uses" properly. +void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { + MachineInstr *MI = SU->getInstr(); + unsigned Reg = MI->getOperand(OperIdx).getReg(); + + // Lookup this operand's reaching definition. + assert(LIS && "vreg dependencies requires LiveIntervals"); + SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(); + LiveInterval *LI = &LIS->getInterval(Reg); + VNInfo *VNI = LI->getVNInfoBefore(UseIdx); + // VNI will be valid because MachineOperand::readsReg() is checked by caller. + MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); + // Phis and other noninstructions (after coalescing) have a NULL Def. + if (Def) { + SUnit *DefSU = getSUnit(Def); + if (DefSU) { + // The reaching Def lives within this scheduling region. + // Create a data dependence. + // + // TODO: Handle "special" address latencies cleanly. + const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg); + if (!UnitLatencies) { + // Adjust the dependence latency using operand def/use information, then + // allow the target to perform its own adjustments. + computeOperandLatency(DefSU, SU, const_cast(dep)); + const TargetSubtargetInfo &ST = TM.getSubtarget(); + ST.adjustSchedDependency(DefSU, SU, const_cast(dep)); + } + SU->addPred(dep); + } + } + + // Add antidependence to the following def of the vreg it uses. + VReg2SUnitMap::iterator DefI = findVRegDef(Reg); + if (DefI != VRegDefs.end() && DefI->SU != SU) + DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg)); +} + +/// Create an SUnit for each real instruction, numbered in top-down toplological +/// order. The instruction order A < B, implies that no edge exists from B to A. +/// +/// Map each real instruction to its SUnit. +/// +/// After initSUnits, the SUnits vector cannot be resized and the scheduler may +/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs +/// instead of pointers. +/// +/// MachineScheduler relies on initSUnits numbering the nodes by their order in +/// the original instruction list. +void ScheduleDAGInstrs::initSUnits() { + // We'll be allocating one SUnit for each real instruction in the region, + // which is contained within a basic block. SUnits.reserve(BB->size()); + for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + + SUnit *SU = newSUnit(MI); + MISUnitMap[MI] = SU; + + SU->isCall = MI->isCall(); + SU->isCommutable = MI->isCommutable(); + + // Assign the Latency field of SU using target-provided information. + if (UnitLatencies) + SU->Latency = 1; + else + computeLatency(SU); + } +} + +void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { + // Create an SUnit for each real instruction. + initSUnits(); + // We build scheduling units by walking a block's instruction list from bottom // to top. @@ -203,29 +519,29 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { std::map AliasMemDefs, NonAliasMemDefs; std::map > AliasMemUses, NonAliasMemUses; - // Check to see if the scheduler cares about latencies. - bool UnitLatencies = ForceUnitLatencies(); - - // Ask the target if address-backscheduling is desirable, and if so how much. - const TargetSubtargetInfo &ST = TM.getSubtarget(); - unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); - // Remove any stale debug info; sometimes BuildSchedGraph is called again // without emitting the info from the previous call. DbgValues.clear(); FirstDbgValue = NULL; + assert(Defs.empty() && Uses.empty() && + "Only BuildGraph should update Defs/Uses"); + Defs.setRegLimit(TRI->getNumRegs()); + Uses.setRegLimit(TRI->getNumRegs()); + + assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); + // FIXME: Allow SparseSet to reserve space for the creation of virtual + // registers during scheduling. Don't artificially inflate the Universe + // because we want to assert that vregs are not created during DAG building. + VRegDefs.setUniverse(MRI.getNumVirtRegs()); + // Model data dependencies between instructions being scheduled and the // ExitSU. - AddSchedBarrierDeps(); - - for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { - assert(Defs[i].empty() && "Only BuildGraph should push/pop Defs"); - } + addSchedBarrierDeps(); // Walk the list of instructions, from bottom moving up. MachineInstr *PrevMI = NULL; - for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; + for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; MII != MIE; --MII) { MachineInstr *MI = prior(MII); if (MI && PrevMI) { @@ -238,19 +554,11 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { continue; } - const MCInstrDesc &MCID = MI->getDesc(); - assert(!MCID.isTerminator() && !MI->isLabel() && + assert(!MI->isTerminator() && !MI->isLabel() && "Cannot schedule terminators or labels!"); - // Create the SUnit for this MI. - SUnit *SU = NewSUnit(MI); - SU->isCall = MCID.isCall(); - SU->isCommutable = MCID.isCommutable(); - // Assign the Latency field of SU using target-provided information. - if (UnitLatencies) - SU->Latency = 1; - else - ComputeLatency(SU); + SUnit *SU = MISUnitMap[MI]; + assert(SU && "No SUnit mapped to this MI"); // Add register-based dependencies (data, anti, and output). for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { @@ -259,152 +567,14 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { unsigned Reg = MO.getReg(); if (Reg == 0) continue; - assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); - - std::vector &UseList = Uses[Reg]; - // Defs are push in the order they are visited and never reordered. - std::vector &DefList = Defs[Reg]; - // Optionally add output and anti dependencies. For anti - // dependencies we use a latency of 0 because for a multi-issue - // target we want to allow the defining instruction to issue - // in the same cycle as the using instruction. - // TODO: Using a latency of 1 here for output dependencies assumes - // there's no cost for reusing registers. - SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; - unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1; - for (unsigned i = 0, e = DefList.size(); i != e; ++i) { - SUnit *DefSU = DefList[i]; - if (DefSU == &ExitSU) - continue; - if (DefSU != SU && - (Kind != SDep::Output || !MO.isDead() || - !DefSU->getInstr()->registerDefIsDead(Reg))) - DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg)); - } - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - std::vector &MemDefList = Defs[*Alias]; - for (unsigned i = 0, e = MemDefList.size(); i != e; ++i) { - SUnit *DefSU = MemDefList[i]; - if (DefSU == &ExitSU) - continue; - if (DefSU != SU && - (Kind != SDep::Output || !MO.isDead() || - !DefSU->getInstr()->registerDefIsDead(*Alias))) - DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias)); - } - } - - if (MO.isDef()) { - // Add any data dependencies. - unsigned DataLatency = SU->Latency; - for (unsigned i = 0, e = UseList.size(); i != e; ++i) { - SUnit *UseSU = UseList[i]; - if (UseSU == SU) - continue; - unsigned LDataLatency = DataLatency; - // Optionally add in a special extra latency for nodes that - // feed addresses. - // TODO: Do this for register aliases too. - // TODO: Perhaps we should get rid of - // SpecialAddressLatency and just move this into - // adjustSchedDependency for the targets that care about it. - if (SpecialAddressLatency != 0 && !UnitLatencies && - UseSU != &ExitSU) { - MachineInstr *UseMI = UseSU->getInstr(); - const MCInstrDesc &UseMCID = UseMI->getDesc(); - int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); - assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); - if (RegUseIndex >= 0 && - (UseMCID.mayLoad() || UseMCID.mayStore()) && - (unsigned)RegUseIndex < UseMCID.getNumOperands() && - UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) - LDataLatency += SpecialAddressLatency; - } - // Adjust the dependence latency using operand def/use - // information (if any), and then allow the target to - // perform its own adjustments. - const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg); - if (!UnitLatencies) { - ComputeOperandLatency(SU, UseSU, const_cast(dep)); - ST.adjustSchedDependency(SU, UseSU, const_cast(dep)); - } - UseSU->addPred(dep); - } - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - std::vector &UseList = Uses[*Alias]; - for (unsigned i = 0, e = UseList.size(); i != e; ++i) { - SUnit *UseSU = UseList[i]; - if (UseSU == SU) - continue; - const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias); - if (!UnitLatencies) { - ComputeOperandLatency(SU, UseSU, const_cast(dep)); - ST.adjustSchedDependency(SU, UseSU, const_cast(dep)); - } - UseSU->addPred(dep); - } - } - - // If a def is going to wrap back around to the top of the loop, - // backschedule it. - if (!UnitLatencies && DefList.empty()) { - LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg); - if (I != LoopRegs.Deps.end()) { - const MachineOperand *UseMO = I->second.first; - unsigned Count = I->second.second; - const MachineInstr *UseMI = UseMO->getParent(); - unsigned UseMOIdx = UseMO - &UseMI->getOperand(0); - const MCInstrDesc &UseMCID = UseMI->getDesc(); - // TODO: If we knew the total depth of the region here, we could - // handle the case where the whole loop is inside the region but - // is large enough that the isScheduleHigh trick isn't needed. - if (UseMOIdx < UseMCID.getNumOperands()) { - // Currently, we only support scheduling regions consisting of - // single basic blocks. Check to see if the instruction is in - // the same region by checking to see if it has the same parent. - if (UseMI->getParent() != MI->getParent()) { - unsigned Latency = SU->Latency; - if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) - Latency += SpecialAddressLatency; - // This is a wild guess as to the portion of the latency which - // will be overlapped by work done outside the current - // scheduling region. - Latency -= std::min(Latency, Count); - // Add the artificial edge. - ExitSU.addPred(SDep(SU, SDep::Order, Latency, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); - } else if (SpecialAddressLatency > 0 && - UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) { - // The entire loop body is within the current scheduling region - // and the latency of this operation is assumed to be greater - // than the latency of the loop. - // TODO: Recursively mark data-edge predecessors as - // isScheduleHigh too. - SU->isScheduleHigh = true; - } - } - LoopRegs.Deps.erase(I); - } - } - - UseList.clear(); - if (!MO.isDead()) - DefList.clear(); - - // Calls will not be reordered because of chain dependencies (see - // below). Since call operands are dead, calls may continue to be added - // to the DefList making dependence checking quadratic in the size of - // the block. Instead, we leave only one call at the back of the - // DefList. - if (SU->isCall) { - while (!DefList.empty() && DefList.back()->isCall) - DefList.pop_back(); - } - DefList.push_back(SU); - } else { - UseList.push_back(SU); + if (TRI->isPhysicalRegister(Reg)) + addPhysRegDeps(SU, j); + else { + assert(!IsPostRA && "Virtual register encountered!"); + if (MO.isDef()) + addVRegDefDeps(SU, j); + else if (MO.readsReg()) // ignore undef operands + addVRegUseDeps(SU, j); } } @@ -419,9 +589,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // produce more precise dependence information. #define STORE_LOAD_LATENCY 1 unsigned TrueMemOrderLatency = 0; - if (MCID.isCall() || MI->hasUnmodeledSideEffects() || + if (MI->isCall() || MI->hasUnmodeledSideEffects() || (MI->hasVolatileMemoryRef() && - (!MCID.mayLoad() || !MI->isInvariantLoad(AA)))) { + (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) { // Be conservative with these and add dependencies on all memory // references, even those that are known to not alias. for (std::map::iterator I = @@ -460,7 +630,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { PendingLoads.clear(); AliasMemDefs.clear(); AliasMemUses.clear(); - } else if (MCID.mayStore()) { + } else if (MI->mayStore()) { bool MayAlias = true; TrueMemOrderLatency = STORE_LOAD_LATENCY; if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { @@ -516,7 +686,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, /*isArtificial=*/true)); - } else if (MCID.mayLoad()) { + } else if (MI->mayLoad()) { bool MayAlias = true; TrueMemOrderLatency = 0; if (MI->isInvariantLoad(AA)) { @@ -558,32 +728,27 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { if (PrevMI) FirstDbgValue = PrevMI; - for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) { - Defs[i].clear(); - Uses[i].clear(); - } + Defs.clear(); + Uses.clear(); + VRegDefs.clear(); PendingLoads.clear(); } -void ScheduleDAGInstrs::FinishBlock() { - // Nothing to do. -} - -void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { +void ScheduleDAGInstrs::computeLatency(SUnit *SU) { // Compute the latency for the node. if (!InstrItins || InstrItins->isEmpty()) { SU->Latency = 1; // Simplistic target-independent heuristic: assume that loads take // extra time. - if (SU->getInstr()->getDesc().mayLoad()) + if (SU->getInstr()->mayLoad()) SU->Latency += 2; } else { SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr()); } } -void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, +void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const { if (!InstrItins || InstrItins->isEmpty()) return; @@ -608,7 +773,9 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, // %Q1 = VMULv8i16 %Q1, %Q3, ... // What we want is to compute latency between def of %D6/%D7 and use of // %Q3 instead. - DefIdx = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); + unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); + if (DefMI->getOperand(Op2).isReg()) + DefIdx = Op2; } MachineInstr *UseMI = Use->getInstr(); // For all uses of the register, calculate the maxmimum latency @@ -656,43 +823,8 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { return oss.str(); } -// EmitSchedule - Emit the machine code in scheduled order. -MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { - // For MachineInstr-based scheduling, we're rescheduling the instructions in - // the block, so start by removing them from the block. - while (Begin != InsertPos) { - MachineBasicBlock::iterator I = Begin; - ++Begin; - BB->remove(I); - } - - // If first instruction was a DBG_VALUE then put it back. - if (FirstDbgValue) - BB->insert(InsertPos, FirstDbgValue); - - // Then re-insert them according to the given schedule. - for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - if (SUnit *SU = Sequence[i]) - BB->insert(InsertPos, SU->getInstr()); - else - // Null SUnit* is a noop. - EmitNoop(); - } - - // Update the Begin iterator, as the first instruction in the block - // may have been scheduled later. - if (!Sequence.empty()) - Begin = Sequence[0]->getInstr(); - - // Reinsert any remaining debug_values. - for (std::vector >::iterator - DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { - std::pair P = *prior(DI); - MachineInstr *DbgValue = P.first; - MachineInstr *OrigPrivMI = P.second; - BB->insertAfter(OrigPrivMI, DbgValue); - } - DbgValues.clear(); - FirstDbgValue = NULL; - return BB; +/// Return the basic block label. It is not necessarilly unique because a block +/// contains multiple scheduling regions. But it is fine for visualization. +std::string ScheduleDAGInstrs::getDAGName() const { + return "dag." + BB->getFullName(); } diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h deleted file mode 100644 index 666bdf548c71..000000000000 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h +++ /dev/null @@ -1,212 +0,0 @@ -//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ScheduleDAGInstrs class, which implements -// scheduling for a MachineInstr-based dependency graph. -// -//===----------------------------------------------------------------------===// - -#ifndef SCHEDULEDAGINSTRS_H -#define SCHEDULEDAGINSTRS_H - -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/SmallSet.h" -#include - -namespace llvm { - class MachineLoopInfo; - class MachineDominatorTree; - - /// LoopDependencies - This class analyzes loop-oriented register - /// dependencies, which are used to guide scheduling decisions. - /// For example, loop induction variable increments should be - /// scheduled as soon as possible after the variable's last use. - /// - class LLVM_LIBRARY_VISIBILITY LoopDependencies { - const MachineLoopInfo &MLI; - const MachineDominatorTree &MDT; - - public: - typedef std::map > - LoopDeps; - LoopDeps Deps; - - LoopDependencies(const MachineLoopInfo &mli, - const MachineDominatorTree &mdt) : - MLI(mli), MDT(mdt) {} - - /// VisitLoop - Clear out any previous state and analyze the given loop. - /// - void VisitLoop(const MachineLoop *Loop) { - assert(Deps.empty() && "stale loop dependencies"); - - MachineBasicBlock *Header = Loop->getHeader(); - SmallSet LoopLiveIns; - for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(), - LE = Header->livein_end(); LI != LE; ++LI) - LoopLiveIns.insert(*LI); - - const MachineDomTreeNode *Node = MDT.getNode(Header); - const MachineBasicBlock *MBB = Node->getBlock(); - assert(Loop->contains(MBB) && - "Loop does not contain header!"); - VisitRegion(Node, MBB, Loop, LoopLiveIns); - } - - private: - void VisitRegion(const MachineDomTreeNode *Node, - const MachineBasicBlock *MBB, - const MachineLoop *Loop, - const SmallSet &LoopLiveIns) { - unsigned Count = 0; - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - const MachineInstr *MI = I; - if (MI->isDebugValue()) - continue; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) - continue; - unsigned MOReg = MO.getReg(); - if (LoopLiveIns.count(MOReg)) - Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count))); - } - ++Count; // Not every iteration due to dbg_value above. - } - - const std::vector &Children = Node->getChildren(); - for (std::vector::const_iterator I = - Children.begin(), E = Children.end(); I != E; ++I) { - const MachineDomTreeNode *ChildNode = *I; - MachineBasicBlock *ChildBlock = ChildNode->getBlock(); - if (Loop->contains(ChildBlock)) - VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns); - } - } - }; - - /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of - /// MachineInstrs. - class LLVM_LIBRARY_VISIBILITY ScheduleDAGInstrs : public ScheduleDAG { - const MachineLoopInfo &MLI; - const MachineDominatorTree &MDT; - const MachineFrameInfo *MFI; - const InstrItineraryData *InstrItins; - - /// Defs, Uses - Remember where defs and uses of each physical register - /// are as we iterate upward through the instructions. This is allocated - /// here instead of inside BuildSchedGraph to avoid the need for it to be - /// initialized and destructed for each block. - std::vector > Defs; - std::vector > Uses; - - /// PendingLoads - Remember where unknown loads are after the most recent - /// unknown store, as we iterate. As with Defs and Uses, this is here - /// to minimize construction/destruction. - std::vector PendingLoads; - - /// LoopRegs - Track which registers are used for loop-carried dependencies. - /// - LoopDependencies LoopRegs; - - /// LoopLiveInRegs - Track which regs are live into a loop, to help guide - /// back-edge-aware scheduling. - /// - SmallSet LoopLiveInRegs; - - protected: - - /// DbgValues - Remember instruction that preceeds DBG_VALUE. - typedef std::vector > - DbgValueVector; - DbgValueVector DbgValues; - MachineInstr *FirstDbgValue; - - public: - MachineBasicBlock::iterator Begin; // The beginning of the range to - // be scheduled. The range extends - // to InsertPos. - unsigned InsertPosIndex; // The index in BB of InsertPos. - - explicit ScheduleDAGInstrs(MachineFunction &mf, - const MachineLoopInfo &mli, - const MachineDominatorTree &mdt); - - virtual ~ScheduleDAGInstrs() {} - - /// NewSUnit - Creates a new SUnit and return a ptr to it. - /// - SUnit *NewSUnit(MachineInstr *MI) { -#ifndef NDEBUG - const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0]; -#endif - SUnits.push_back(SUnit(MI, (unsigned)SUnits.size())); - assert((Addr == 0 || Addr == &SUnits[0]) && - "SUnits std::vector reallocated on the fly!"); - SUnits.back().OrigNode = &SUnits.back(); - return &SUnits.back(); - } - - /// Run - perform scheduling. - /// - void Run(MachineBasicBlock *bb, - MachineBasicBlock::iterator begin, - MachineBasicBlock::iterator end, - unsigned endindex); - - /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are - /// input. - virtual void BuildSchedGraph(AliasAnalysis *AA); - - /// AddSchedBarrierDeps - Add dependencies from instructions in the current - /// list of instructions being scheduled to scheduling barrier. We want to - /// make sure instructions which define registers that are either used by - /// the terminator or are live-out are properly scheduled. This is - /// especially important when the definition latency of the return value(s) - /// are too high to be hidden by the branch or when the liveout registers - /// used by instructions in the fallthrough block. - void AddSchedBarrierDeps(); - - /// ComputeLatency - Compute node latency. - /// - virtual void ComputeLatency(SUnit *SU); - - /// ComputeOperandLatency - Override dependence edge latency using - /// operand use/def information - /// - virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use, - SDep& dep) const; - - virtual MachineBasicBlock *EmitSchedule(); - - /// StartBlock - Prepare to perform scheduling in the given block. - /// - virtual void StartBlock(MachineBasicBlock *BB); - - /// Schedule - Order nodes according to selected style, filling - /// in the Sequence member. - /// - virtual void Schedule() = 0; - - /// FinishBlock - Clean up after scheduling in the given block. - /// - virtual void FinishBlock(); - - virtual void dumpNode(const SUnit *SU) const; - - virtual std::string getGraphNodeLabel(const SUnit *SU) const; - }; -} - -#endif diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp index 4b55a2284f85..38feee95a58e 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -25,7 +25,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Config/config.h" #include using namespace llvm; @@ -42,12 +41,12 @@ namespace llvm { static bool renderGraphFromBottomUp() { return true; } - + static bool hasNodeAddressLabel(const SUnit *Node, const ScheduleDAG *Graph) { return true; } - + /// If you want to override the dot attributes printed for a particular /// edge, override this method. static std::string getEdgeAttributes(const SUnit *Node, @@ -59,7 +58,7 @@ namespace llvm { return "color=blue,style=dashed"; return ""; } - + std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph); static std::string getNodeAttributes(const SUnit *N, @@ -82,18 +81,17 @@ std::string DOTGraphTraits::getNodeLabel(const SUnit *SU, /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG /// rendered using 'dot'. /// -void ScheduleDAG::viewGraph() { -// This code is only for debugging! +void ScheduleDAG::viewGraph(const Twine &Name, const Twine &Title) { + // This code is only for debugging! #ifndef NDEBUG - if (BB->getBasicBlock()) - ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false, - "Scheduling-Units Graph for " + MF.getFunction()->getNameStr() + - ":" + BB->getBasicBlock()->getNameStr()); - else - ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false, - "Scheduling-Units Graph for " + MF.getFunction()->getNameStr()); + ViewGraph(this, Name, false, Title); #else errs() << "ScheduleDAG::viewGraph is only available in debug builds on " << "systems with Graphviz or gv!\n"; #endif // NDEBUG } + +/// Out-of-line implementation with no arguments is handy for gdb. +void ScheduleDAG::viewGraph() { + viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); +} diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp index b80c01ed58b9..3d22035974da 100644 --- a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -140,8 +140,6 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { unsigned freeUnits = IS->getUnits(); switch (IS->getReservationKind()) { - default: - assert(0 && "Invalid FU reservation"); case InstrStage::Required: // Required FUs conflict with both reserved and required ones freeUnits &= ~ReservedScoreboard[StageCycle]; @@ -194,8 +192,6 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { unsigned freeUnits = IS->getUnits(); switch (IS->getReservationKind()) { - default: - assert(0 && "Invalid FU reservation"); case InstrStage::Required: // Required FUs conflict with both reserved and required ones freeUnits &= ~ReservedScoreboard[cycle + i]; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7b878688df63..d1b998f8d840 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -22,7 +22,6 @@ #include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" @@ -64,7 +63,24 @@ namespace { bool LegalTypes; // Worklist of all of the nodes that need to be simplified. - std::vector WorkList; + // + // This has the semantics that when adding to the worklist, + // the item added must be next to be processed. It should + // also only appear once. The naive approach to this takes + // linear time. + // + // To reduce the insert/remove time to logarithmic, we use + // a set and a vector to maintain our worklist. + // + // The set contains the items on the worklist, but does not + // maintain the order they should be visited. + // + // The vector maintains the order nodes should be visited, but may + // contain duplicate or removed nodes. When choosing a node to + // visit, we pop off the order stack until we find an item that is + // also in the contents set. All operations are O(log N). + SmallPtrSet WorkListContents; + SmallVector WorkListOrder; // AA - Used for DAG load/store alias analysis. AliasAnalysis &AA; @@ -84,18 +100,17 @@ namespace { SDValue visit(SDNode *N); public: - /// AddToWorkList - Add to the work list making sure it's instance is at the - /// the back (next to be processed.) + /// AddToWorkList - Add to the work list making sure its instance is at the + /// back (next to be processed.) void AddToWorkList(SDNode *N) { - removeFromWorkList(N); - WorkList.push_back(N); + WorkListContents.insert(N); + WorkListOrder.push_back(N); } /// removeFromWorkList - remove all instances of N from the worklist. /// void removeFromWorkList(SDNode *N) { - WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N), - WorkList.end()); + WorkListContents.erase(N); } SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, @@ -159,7 +174,9 @@ namespace { SDValue visitADD(SDNode *N); SDValue visitSUB(SDNode *N); SDValue visitADDC(SDNode *N); + SDValue visitSUBC(SDNode *N); SDValue visitADDE(SDNode *N); + SDValue visitSUBE(SDNode *N); SDValue visitMUL(SDNode *N); SDValue visitSDIV(SDNode *N); SDValue visitUDIV(SDNode *N); @@ -181,7 +198,9 @@ namespace { SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); SDValue visitCTLZ(SDNode *N); + SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); SDValue visitCTTZ(SDNode *N); + SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); SDValue visitCTPOP(SDNode *N); SDValue visitSELECT(SDNode *N); SDValue visitSELECT_CC(SDNode *N); @@ -279,7 +298,7 @@ namespace { public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) - : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted), + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} /// Run - runs the dag combiner on all nodes in the work list @@ -362,6 +381,8 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { /// specified expression for the same cost as the expression itself, or 2 if we /// can compute the negated form more cheaply than the expression itself. static char isNegatibleForFree(SDValue Op, bool LegalOperations, + const TargetLowering &TLI, + const TargetOptions *Options, unsigned Depth = 0) { // No compile time optimizations on this type. if (Op.getValueType() == MVT::ppcf128) @@ -384,34 +405,44 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, return LegalOperations ? 0 : 1; case ISD::FADD: // FIXME: determine better conditions for this xform. - if (!UnsafeFPMath) return 0; + if (!Options->UnsafeFPMath) return 0; + + // After operation legalization, it might not be legal to create new FSUBs. + if (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) + return 0; // fold (fsub (fadd A, B)) -> (fsub (fneg A), B) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, + Options, Depth + 1)) return V; // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1); + return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, + Depth + 1); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. - if (!UnsafeFPMath) return 0; + if (!Options->UnsafeFPMath) return 0; // fold (fneg (fsub A, B)) -> (fsub B, A) return 1; case ISD::FMUL: case ISD::FDIV: - if (HonorSignDependentRoundingFPMath()) return 0; + if (Options->HonorSignDependentRoundingFPMath()) return 0; // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, + Options, Depth + 1)) return V; - return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1); + return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, + Depth + 1); case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FSIN: - return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1); + return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, + Depth + 1); } } @@ -435,10 +466,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, } case ISD::FADD: // FIXME: determine better conditions for this xform. - assert(UnsafeFPMath); + assert(DAG.getTarget().Options.UnsafeFPMath); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, + DAG.getTargetLoweringInfo(), + &DAG.getTarget().Options, Depth+1)) return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), @@ -450,7 +483,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, Op.getOperand(0)); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. - assert(UnsafeFPMath); + assert(DAG.getTarget().Options.UnsafeFPMath); // fold (fneg (fsub 0, B)) -> B if (ConstantFPSDNode *N0CFP = dyn_cast(Op.getOperand(0))) @@ -463,10 +496,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, case ISD::FMUL: case ISD::FDIV: - assert(!HonorSignDependentRoundingFPMath()); + assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath()); // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) - if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, + DAG.getTargetLoweringInfo(), + &DAG.getTarget().Options, Depth+1)) return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), @@ -944,14 +979,13 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { void DAGCombiner::Run(CombineLevel AtLevel) { // set the instance variables, so that the various visit routines may use it. Level = AtLevel; - LegalOperations = Level >= NoIllegalOperations; - LegalTypes = Level >= NoIllegalTypes; + LegalOperations = Level >= AfterLegalizeVectorOps; + LegalTypes = Level >= AfterLegalizeTypes; // Add all the dag nodes to the worklist. - WorkList.reserve(DAG.allnodes_size()); for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) - WorkList.push_back(I); + AddToWorkList(I); // Create a dummy node (which is not added to allnodes), that adds a reference // to the root node, preventing it from being deleted, and tracking any @@ -962,11 +996,17 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // done. Set it to null to avoid confusion. DAG.setRoot(SDValue()); - // while the worklist isn't empty, inspect the node on the end of it and + // while the worklist isn't empty, find a node and // try and combine it. - while (!WorkList.empty()) { - SDNode *N = WorkList.back(); - WorkList.pop_back(); + while (!WorkListContents.empty()) { + SDNode *N; + // The WorkListOrder holds the SDNodes in order, but it may contain duplicates. + // In order to avoid a linear scan, we use a set (O(log N)) to hold what the + // worklist *should* contain, and check the node we want to visit is should + // actually be visited. + do { + N = WorkListOrder.pop_back_val(); + } while (!WorkListContents.erase(N)); // If N has no uses, it is dead. Make sure to revisit all N's operands once // N is deleted from the DAG, since they too may now be dead or may have a @@ -1050,7 +1090,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ADD: return visitADD(N); case ISD::SUB: return visitSUB(N); case ISD::ADDC: return visitADDC(N); + case ISD::SUBC: return visitSUBC(N); case ISD::ADDE: return visitADDE(N); + case ISD::SUBE: return visitSUBE(N); case ISD::MUL: return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); @@ -1071,7 +1113,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SRA: return visitSRA(N); case ISD::SRL: return visitSRL(N); case ISD::CTLZ: return visitCTLZ(N); + case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); case ISD::CTTZ: return visitCTTZ(N); + case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); case ISD::CTPOP: return visitCTPOP(N); case ISD::SELECT: return visitSELECT(N); case ISD::SELECT_CC: return visitSELECT_CC(N); @@ -1408,16 +1452,14 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (VT.isInteger() && !VT.isVector()) { APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { - DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. - if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || - (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); } } @@ -1486,8 +1528,8 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an ADD. - if (N->hasNUsesOfValue(0, 1)) - return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0), + if (!N->hasAnyUseOfValue(1)) + return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), MVT::Glue)); @@ -1503,16 +1545,14 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { - DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. - if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || - (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), MVT::Glue)); @@ -1535,7 +1575,7 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { // fold (adde x, y, false) -> (addc x, y) if (CarryIn.getOpcode() == ISD::CARRY_FALSE) - return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); + return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1); return SDValue(); } @@ -1645,6 +1685,51 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSUBC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); + EVT VT = N0.getValueType(); + + // If the flag result is dead, turn this into an SUB. + if (!N->hasAnyUseOfValue(1)) + return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + // fold (subc x, x) -> 0 + no borrow + if (N0 == N1) + return CombineTo(N, DAG.getConstant(0, VT), + DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + // fold (subc x, 0) -> x + no borrow + if (N1C && N1C->isNullValue()) + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow + if (N0C && N0C->isAllOnesValue()) + return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + MVT::Glue)); + + return SDValue(); +} + +SDValue DAGCombiner::visitSUBE(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + + // fold (sube x, y, false) -> (subc x, y) + if (CarryIn.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1); + + return SDValue(); +} + SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1756,7 +1841,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { if (N0C && N1C && !N1C->isNullValue()) return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); // fold (sdiv X, 1) -> X - if (N1C && N1C->getSExtValue() == 1LL) + if (N1C && N1C->getAPIntValue() == 1LL) return N0; // fold (sdiv X, -1) -> 0-X if (N1C && N1C->isAllOnesValue()) @@ -1770,17 +1855,15 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { N0, N1); } // fold (sdiv X, pow2) -> simple ops after legalize - if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() && - (isPowerOf2_64(N1C->getSExtValue()) || - isPowerOf2_64(-N1C->getSExtValue()))) { + if (N1C && !N1C->isNullValue() && + (N1C->getAPIntValue().isPowerOf2() || + (-N1C->getAPIntValue()).isPowerOf2())) { // If dividing by powers of two is cheap, then don't perform the following // fold. if (TLI.isPow2DivCheap()) return SDValue(); - int64_t pow2 = N1C->getSExtValue(); - int64_t abs2 = pow2 > 0 ? pow2 : -pow2; - unsigned lg2 = Log2_64(abs2); + unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); // Splat the sign bit into the register SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, @@ -1800,7 +1883,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // If we're dividing by a positive value, we're done. Otherwise, we must // negate the result. - if (pow2 > 0) + if (N1C->getAPIntValue().isNonNegative()) return SRA; AddToWorkList(SRA.getNode()); @@ -1810,8 +1893,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // if integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. - if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) && - !TLI.isIntDivCheap()) { + if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { SDValue Op = BuildSDIV(N); if (Op.getNode()) return Op; } @@ -2250,6 +2332,67 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { ORNode, N0.getOperand(1)); } + // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) + // Only perform this optimization after type legalization and before + // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by + // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and + // we don't want to undo this promotion. + // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper + // on scalars. + if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) + && Level == AfterLegalizeVectorOps) { + SDValue In0 = N0.getOperand(0); + SDValue In1 = N1.getOperand(0); + EVT In0Ty = In0.getValueType(); + EVT In1Ty = In1.getValueType(); + // If both incoming values are integers, and the original types are the same. + if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { + SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1); + SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op); + AddToWorkList(Op.getNode()); + return BC; + } + } + + // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). + // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) + // If both shuffles use the same mask, and both shuffle within a single + // vector, then it is worthwhile to move the swizzle after the operation. + // The type-legalizer generates this pattern when loading illegal + // vector types from memory. In many cases this allows additional shuffle + // optimizations. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + N0.getOperand(1).getOpcode() == ISD::UNDEF && + N1.getOperand(1).getOpcode() == ISD::UNDEF) { + ShuffleVectorSDNode *SVN0 = cast(N0); + ShuffleVectorSDNode *SVN1 = cast(N1); + + assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() && + "Inputs to shuffles are not the same type"); + + unsigned NumElts = VT.getVectorNumElements(); + + // Check that both shuffles use the same mask. The masks are known to be of + // the same length because the result vector type is the same. + bool SameMask = true; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx0 = SVN0->getMaskElt(i); + int Idx1 = SVN1->getMaskElt(i); + if (Idx0 != Idx1) { + SameMask = false; + break; + } + } + + if (SameMask) { + SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, + N0.getOperand(0), N1.getOperand(0)); + AddToWorkList(Op.getNode()); + return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op, + DAG.getUNDEF(VT), &SVN0->getMask()[0]); + } + } + return SDValue(); } @@ -2312,6 +2455,88 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } + // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> + // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must + // already be zero by virtue of the width of the base type of the load. + // + // the 'X' node here can either be nothing or an extract_vector_elt to catch + // more cases. + if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + N0.getOperand(0).getOpcode() == ISD::LOAD) || + N0.getOpcode() == ISD::LOAD) { + LoadSDNode *Load = cast( (N0.getOpcode() == ISD::LOAD) ? + N0 : N0.getOperand(0) ); + + // Get the constant (if applicable) the zero'th operand is being ANDed with. + // This can be a pure constant or a vector splat, in which case we treat the + // vector as a scalar and use the splat value. + APInt Constant = APInt::getNullValue(1); + if (const ConstantSDNode *C = dyn_cast(N1)) { + Constant = C->getAPIntValue(); + } else if (BuildVectorSDNode *Vector = dyn_cast(N1)) { + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, + SplatBitSize, HasAnyUndefs); + if (IsSplat) { + // Undef bits can contribute to a possible optimisation if set, so + // set them. + SplatValue |= SplatUndef; + + // The splat value may be something like "0x00FFFFFF", which means 0 for + // the first vector value and FF for the rest, repeating. We need a mask + // that will apply equally to all members of the vector, so AND all the + // lanes of the constant together. + EVT VT = Vector->getValueType(0); + unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); + Constant = APInt::getAllOnesValue(BitWidth); + for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i) + Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); + } + } + + // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is + // actually legal and isn't going to get expanded, else this is a false + // optimisation. + bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, + Load->getMemoryVT()); + + // Resize the constant to the same size as the original memory access before + // extension. If it is still the AllOnesValue then this AND is completely + // unneeded. + Constant = + Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); + + bool B; + switch (Load->getExtensionType()) { + default: B = false; break; + case ISD::EXTLOAD: B = CanZextLoadProfitably; break; + case ISD::ZEXTLOAD: + case ISD::NON_EXTLOAD: B = true; break; + } + + if (B && Constant.isAllOnesValue()) { + // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to + // preserve semantics once we get rid of the AND. + SDValue NewLoad(Load, 0); + if (Load->getExtensionType() == ISD::EXTLOAD) { + NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, + Load->getValueType(0), Load->getDebugLoc(), + Load->getChain(), Load->getBasePtr(), + Load->getOffset(), Load->getMemoryVT(), + Load->getMemOperand()); + // Replace uses of the EXTLOAD with the new ZEXTLOAD. + CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); + } + + // Fold the AND away, taking care not to fold to the old load node if we + // replaced it. + CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ ISD::CondCode Op0 = cast(CC0)->get(); @@ -3323,7 +3548,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or // (and (srl x, (sub c1, c2), MASK) - if (N1C && N0.getOpcode() == ISD::SRL && + // Only fold this if the inner shift has no other uses -- if it does, folding + // this will increase the total number of instructions. + if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && N0.getOperand(1).getOpcode() == ISD::Constant) { uint64_t c1 = cast(N0.getOperand(1))->getZExtValue(); if (c1 < VT.getSizeInBits()) { @@ -3603,8 +3830,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && N0.getOpcode() == ISD::CTLZ && N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne); // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. @@ -3612,7 +3838,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // If all of the bits input the to ctlz node are known to be zero, then // the result of the ctlz is "32" and the result of the shift is one. - APInt UnknownBits = ~KnownZero & Mask; + APInt UnknownBits = ~KnownZero; if (UnknownBits == 0) return DAG.getConstant(1, VT); // Otherwise, check to see if there is exactly one bit input to the ctlz. @@ -3713,6 +3939,16 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ctlz_zero_undef c1) -> c2 + if (isa(N0)) + return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); + return SDValue(); +} + SDValue DAGCombiner::visitCTTZ(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -3723,6 +3959,16 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (cttz_zero_undef c1) -> c2 + if (isa(N0)) + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); + return SDValue(); +} + SDValue DAGCombiner::visitCTPOP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -4108,12 +4354,17 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // Only do this before legalize for now. if (VT.isVector() && !LegalOperations) { EVT N0VT = N0.getOperand(0).getValueType(); - // We know that the # elements of the results is the same as the - // # elements of the compare (and the # elements of the compare result - // for that matter). Check to see that they are the same size. If so, - // we know that the element size of the sext'd result matches the - // element size of the compare operands. - if (VT.getSizeInBits() == N0VT.getSizeInBits()) + // On some architectures (such as SSE/NEON/etc) the SETCC result type is + // of the same size as the compared operands. Only optimize sext(setcc()) + // if this is the case. + EVT SVT = TLI.getSetCCResultType(N0VT); + + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == SVT.getSizeInBits()) return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); @@ -4127,11 +4378,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT MatchingVectorType = EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); - SDValue VsetCC = - DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + + if (SVT == MatchingVectorType) { + SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, + N0.getOperand(0), N0.getOperand(1), + cast(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + } } } @@ -4162,6 +4415,44 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { return SDValue(); } +// isTruncateOf - If N is a truncate of some other value, return true, record +// the value being truncated in Op and which of Op's bits are zero in KnownZero. +// This function computes KnownZero to avoid a duplicated call to +// ComputeMaskedBits in the caller. +static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, + APInt &KnownZero) { + APInt KnownOne; + if (N->getOpcode() == ISD::TRUNCATE) { + Op = N->getOperand(0); + DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + return true; + } + + if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || + cast(N->getOperand(2))->get() != ISD::SETNE) + return false; + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + assert(Op0.getValueType() == Op1.getValueType()); + + ConstantSDNode *COp0 = dyn_cast(Op0); + ConstantSDNode *COp1 = dyn_cast(Op1); + if (COp0 && COp0->isNullValue()) + Op = Op1; + else if (COp1 && COp1->isNullValue()) + Op = Op0; + else + return false; + + DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + + if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) + return false; + + return true; +} + SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -4175,6 +4466,30 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0.getOperand(0)); + // fold (zext (truncate x)) -> (zext x) or + // (zext (truncate x)) -> (truncate x) + // This is valid when the truncated bits of x are already zero. + // FIXME: We should extend this to work for vectors too. + SDValue Op; + APInt KnownZero; + if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { + APInt TruncatedBits = + (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? + APInt(Op.getValueSizeInBits(), 0) : + APInt::getBitsSet(Op.getValueSizeInBits(), + N0.getValueSizeInBits(), + std::min(Op.getValueSizeInBits(), + VT.getSizeInBits())); + if (TruncatedBits == (KnownZero & TruncatedBits)) { + if (VT.bitsGT(Op.getValueType())) + return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op); + if (VT.bitsLT(Op.getValueType())) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + + return Op; + } + } + // fold (zext (truncate (load x))) -> (zext (smaller load x)) // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { @@ -4567,6 +4882,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { switch (V.getOpcode()) { default: break; + case ISD::Constant: { + const ConstantSDNode *CV = cast(V.getNode()); + assert(CV != 0 && "Const value should be ConstSDNode."); + const APInt &CVal = CV->getAPIntValue(); + APInt NewVal = CVal & Mask; + if (NewVal != CVal) { + return DAG.getConstant(NewVal, V.getValueType()); + } + break; + } case ISD::OR: case ISD::XOR: // If the LHS or RHS don't contribute bits to the or, drop them. @@ -4705,7 +5030,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (ExtType == ISD::NON_EXTLOAD) Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), NewAlign); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), NewAlign); else Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), @@ -4844,6 +5170,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + bool isLE = TLI.isLittleEndian(); // noop truncate if (N0.getValueType() == N->getValueType(0)) @@ -4871,6 +5198,44 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return N0.getOperand(0); } + // Fold extract-and-trunc into a narrow extract. For example: + // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) + // i32 y = TRUNCATE(i64 x) + // -- becomes -- + // v16i8 b = BITCAST (v2i64 val) + // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) + // + // Note: We only run this optimization after type legalization (which often + // creates this pattern) and before operation legalization after which + // we need to be more careful about the vector instructions that we generate. + if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + LegalTypes && !LegalOperations && N0->hasOneUse()) { + + EVT VecTy = N0.getOperand(0).getValueType(); + EVT ExTy = N0.getValueType(); + EVT TrTy = N->getValueType(0); + + unsigned NumElem = VecTy.getVectorNumElements(); + unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); + assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); + + SDValue EltNo = N0->getOperand(1); + if (isa(EltNo) && isTypeLegal(NVT)) { + int Elt = cast(EltNo)->getZExtValue(); + + int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); + + SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + NVT, N0.getOperand(0)); + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + N->getDebugLoc(), TrTy, V, + DAG.getConstant(Index, MVT::i32)); + } + } + // See if we can simplify the input to this truncate through knowledge that // only the low bits are being used. // For example "trunc (or (shl x, 8), y)" // -> trunc y @@ -4934,7 +5299,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), LD1->getBasePtr(), LD1->getPointerInfo(), - false, false, Align); + false, false, false, Align); } return SDValue(); @@ -5004,7 +5369,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getPointerInfo(), LN0->isVolatile(), LN0->isNonTemporal(), - OrigAlign); + LN0->isInvariant(), OrigAlign); AddToWorkList(N); CombineTo(N0.getNode(), DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), @@ -5017,7 +5382,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) // This often reduces constant pool loads. - if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) && + if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || + (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, N0.getOperand(0)); @@ -5247,20 +5613,24 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { if (N0CFP && !N1CFP) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0); // fold (fadd A, 0) -> A - if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N1CFP->getValueAPF().isZero()) return N0; // fold (fadd A, (fneg B)) -> (fsub A, B) - if (isNegatibleForFree(N1, LegalOperations) == 2) + if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && + isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // fold (fadd (fneg A), B) -> (fsub B, A) - if (isNegatibleForFree(N0, LegalOperations) == 2) + if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && + isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) - if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD && - N0.getNode()->hasOneUse() && isa(N0.getOperand(1))) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && + isa(N0.getOperand(1))) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(1), N1)); @@ -5285,20 +5655,39 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N0CFP && N1CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); // fold (fsub A, 0) -> A - if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + if (DAG.getTarget().Options.UnsafeFPMath && + N1CFP && N1CFP->getValueAPF().isZero()) return N0; // fold (fsub 0, B) -> -B - if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) { - if (isNegatibleForFree(N1, LegalOperations)) + if (DAG.getTarget().Options.UnsafeFPMath && + N0CFP && N0CFP->getValueAPF().isZero()) { + if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); } // fold (fsub A, (fneg B)) -> (fadd A, B) - if (isNegatibleForFree(N1, LegalOperations)) + if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); + // If 'unsafe math' is enabled, fold + // (fsub x, (fadd x, y)) -> (fneg y) & + // (fsub x, (fadd y, x)) -> (fneg y) + if (DAG.getTarget().Options.UnsafeFPMath) { + if (N1.getOpcode() == ISD::FADD) { + SDValue N10 = N1->getOperand(0); + SDValue N11 = N1->getOperand(1); + + if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, + &DAG.getTarget().Options)) + return GetNegatedExpression(N11, DAG, LegalOperations); + else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, + &DAG.getTarget().Options)) + return GetNegatedExpression(N10, DAG, LegalOperations); + } + } + return SDValue(); } @@ -5308,6 +5697,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // fold vector ops if (VT.isVector()) { @@ -5322,10 +5712,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (N0CFP && !N1CFP) return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0); // fold (fmul A, 0) -> 0 - if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + if (DAG.getTarget().Options.UnsafeFPMath && + N1CFP && N1CFP->getValueAPF().isZero()) return N1; // fold (fmul A, 0) -> 0, vector edition. - if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode())) + if (DAG.getTarget().Options.UnsafeFPMath && + ISD::isBuildVectorAllZeros(N1.getNode())) return N1; // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) @@ -5336,8 +5728,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) { + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, + &DAG.getTarget().Options)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, + &DAG.getTarget().Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) @@ -5348,7 +5742,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { } // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) - if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL && + if (DAG.getTarget().Options.UnsafeFPMath && + N1CFP && N0.getOpcode() == ISD::FMUL && N0.getNode()->hasOneUse() && isa(N0.getOperand(1))) return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, @@ -5363,6 +5758,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // fold vector ops if (VT.isVector()) { @@ -5374,10 +5770,30 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (N0CFP && N1CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); + // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. + if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) { + // Compute the reciprocal 1.0 / c2. + APFloat N1APF = N1CFP->getValueAPF(); + APFloat Recip(N1APF.getSemantics(), 1); // 1.0 + APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); + // Only do the transform if the reciprocal is a legal fp immediate that + // isn't too nasty (eg NaN, denormal, ...). + if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty + (!LegalOperations || + // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM + // backend)... we should handle this gracefully after Legalize. + // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || + TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || + TLI.isFPImmLegal(Recip, VT))) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, + DAG.getConstantFP(Recip, VT)); + } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) { + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, + &DAG.getTarget().Options)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, + &DAG.getTarget().Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) @@ -5463,7 +5879,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { // fold (sint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128 && // ...but only if the target supports immediate floating-point values - (Level == llvm::Unrestricted || + (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); @@ -5488,7 +5904,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { // fold (uint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128 && // ...but only if the target supports immediate floating-point values - (Level == llvm::Unrestricted || + (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); @@ -5630,12 +6046,13 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (isNegatibleForFree(N0, LegalOperations)) + if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), + &DAG.getTarget().Options)) return GetNegatedExpression(N0, DAG, LegalOperations); // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading // constant pool values. - if (N0.getOpcode() == ISD::BITCAST && + if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST && !VT.isVector() && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger()) { @@ -5671,7 +6088,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading // constant pool values. - if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && + if (!TLI.isFAbsFree(VT) && + N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger() && !N0.getOperand(0).getValueType().isVector()) { SDValue Int = N0.getOperand(0); @@ -5860,6 +6278,47 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { return SDValue(); } +/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that +/// uses N as its base pointer and that N may be folded in the load / store +/// addressing mode. +static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, + SelectionDAG &DAG, + const TargetLowering &TLI) { + EVT VT; + if (LoadSDNode *LD = dyn_cast(Use)) { + if (LD->isIndexed() || LD->getBasePtr().getNode() != N) + return false; + VT = Use->getValueType(0); + } else if (StoreSDNode *ST = dyn_cast(Use)) { + if (ST->isIndexed() || ST->getBasePtr().getNode() != N) + return false; + VT = ST->getValue().getValueType(); + } else + return false; + + TargetLowering::AddrMode AM; + if (N->getOpcode() == ISD::ADD) { + ConstantSDNode *Offset = dyn_cast(N->getOperand(1)); + if (Offset) + // [reg +/- imm] + AM.BaseOffs = Offset->getSExtValue(); + else + // [reg +/- reg] + AM.Scale = 1; + } else if (N->getOpcode() == ISD::SUB) { + ConstantSDNode *Offset = dyn_cast(N->getOperand(1)); + if (Offset) + // [reg +/- imm] + AM.BaseOffs = -Offset->getSExtValue(); + else + // [reg +/- reg] + AM.Scale = 1; + } else + return false; + + return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext())); +} + /// CombineToPreIndexedLoadStore - Try turning a load / store into a /// pre-indexed load / store when the base pointer is an add or subtract /// and it has other uses besides the load / store. After the @@ -5867,7 +6326,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { /// the add / subtract in and all of its other uses are redirected to the /// new load / store. bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { - if (!LegalOperations) + if (Level < AfterLegalizeDAG) return false; bool isLoad = true; @@ -5946,10 +6405,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { if (N->hasPredecessorHelper(Use, Visited, Worklist)) return false; - if (!((Use->getOpcode() == ISD::LOAD && - cast(Use)->getBasePtr() == Ptr) || - (Use->getOpcode() == ISD::STORE && - cast(Use)->getBasePtr() == Ptr))) + // If Ptr may be folded in addressing mode of other use, then it's + // not profitable to do this transformation. + if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) RealUse = true; } @@ -5999,7 +6457,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { /// load / store effectively and all of its uses are redirected to the /// new load / store. bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { - if (!LegalOperations) + if (Level < AfterLegalizeDAG) return false; bool isLoad = true; @@ -6046,7 +6504,8 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { continue; // Try turning it into a post-indexed load / store except when - // 1) All uses are load / store ops that use it as base ptr. + // 1) All uses are load / store ops that use it as base ptr (and + // it may be folded as addressing mmode). // 2) Op must be independent of N, i.e. Op is neither a predecessor // nor a successor of N. Otherwise, if Op is folded that would // create a cycle. @@ -6069,10 +6528,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { for (SDNode::use_iterator III = Use->use_begin(), EEE = Use->use_end(); III != EEE; ++III) { SDNode *UseUse = *III; - if (!((UseUse->getOpcode() == ISD::LOAD && - cast(UseUse)->getBasePtr().getNode() == Use) || - (UseUse->getOpcode() == ISD::STORE && - cast(UseUse)->getBasePtr().getNode() == Use))) + if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) RealUse = true; } @@ -6139,7 +6595,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (!LD->isVolatile()) { if (N->getValueType(1) == MVT::Other) { // Unindexed loads. - if (N->hasNUsesOfValue(0, 0)) { + if (!N->hasAnyUseOfValue(0)) { // It's not safe to use the two value CombineTo variant here. e.g. // v1, chain2 = load chain1, loc // v2, chain3 = load chain2, loc @@ -6164,7 +6620,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { } else { // Indexed loads. assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); - if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { + if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); @@ -6222,7 +6678,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), BetterChain, Ptr, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + LD->isInvariant(), LD->getAlignment()); } else { ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), LD->getValueType(0), @@ -6486,7 +6942,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), LD->isVolatile(), LD->isNonTemporal(), - NewAlign); + LD->isInvariant(), NewAlign); SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, DAG.getConstant(NewImm, NewVT)); SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), @@ -6546,7 +7002,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), - false, false, LDAlign); + false, false, false, LDAlign); SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(), NewLD, ST->getBasePtr(), @@ -6823,13 +7279,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (scalar_to_vector val, 0) -> val SDValue InVec = N->getOperand(0); + EVT VT = InVec.getValueType(); + EVT NVT = N->getValueType(0); if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { // Check if the result type doesn't match the inserted element type. A // SCALAR_TO_VECTOR may truncate the inserted element and the // EXTRACT_VECTOR_ELT may widen the extracted vector. SDValue InOp = InVec.getOperand(0); - EVT NVT = N->getValueType(0); if (InOp.getValueType() != NVT) { assert(InOp.getValueType().isInteger() && NVT.isInteger()); return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); @@ -6837,6 +7294,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { return InOp; } + SDValue EltNo = N->getOperand(1); + bool ConstEltNo = isa(EltNo); + + // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. + // We only perform this optimization before the op legalization phase because + // we may introduce new vector instructions which are not backed by TD patterns. + // For example on AVX, extracting elements from a wide vector without using + // extract_subvector. + if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE + && ConstEltNo && !LegalOperations) { + int Elt = cast(EltNo)->getZExtValue(); + int NumElem = VT.getVectorNumElements(); + ShuffleVectorSDNode *SVOp = cast(InVec); + // Find the new index to extract from. + int OrigElt = SVOp->getMaskElt(Elt); + + // Extracting an undef index is undef. + if (OrigElt == -1) + return DAG.getUNDEF(NVT); + + // Select the right vector half to extract from. + if (OrigElt < NumElem) { + InVec = InVec->getOperand(0); + } else { + InVec = InVec->getOperand(1); + OrigElt -= NumElem; + } + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT, + InVec, DAG.getConstant(OrigElt, MVT::i32)); + } + // Perform only after legalization to ensure build_vector / vector_shuffle // optimizations have already been done. if (!LegalOperations) return SDValue(); @@ -6844,17 +7333,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) - SDValue EltNo = N->getOperand(1); - if (isa(EltNo)) { + if (ConstEltNo) { int Elt = cast(EltNo)->getZExtValue(); bool NewLoad = false; bool BCNumEltsChanged = false; - EVT VT = InVec.getValueType(); EVT ExtVT = VT.getVectorElementType(); EVT LVT = ExtVT; + // If the result of load has to be truncated, then it's not necessarily + // profitable. + if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) + return SDValue(); + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + EVT BCVT = InVec.getOperand(0).getValueType(); if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) return SDValue(); @@ -6872,12 +7368,20 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && InVec.getOperand(0).getValueType() == ExtVT && ISD::isNormalLoad(InVec.getOperand(0).getNode())) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + LN0 = cast(InVec.getOperand(0)); } else if ((SVN = dyn_cast(InVec))) { // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) // => // (load $addr+1*size) + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + // If the bit convert changed the number of elements, it is unsafe // to examine the mask. if (BCNumEltsChanged) @@ -6888,14 +7392,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); - if (InVec.getOpcode() == ISD::BITCAST) + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + InVec = InVec.getOperand(0); + } if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast(InVec); Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; } } + // Make sure we found a non-volatile load and the extractelement is + // the only use. if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) return SDValue(); @@ -6929,9 +7440,45 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { DAG.getConstant(PtrOff, PtrType)); } - return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), Align); + // The replacement we need to do here is a little tricky: we need to + // replace an extractelement of a load with a load. + // Use ReplaceAllUsesOfValuesWith to do the replacement. + // Note that this replacement assumes that the extractvalue is the only + // use of the load; that's okay because we don't want to perform this + // transformation in other cases anyway. + SDValue Load; + SDValue Chain; + if (NVT.bitsGT(LVT)) { + // If the result type of vextract is wider than the load, then issue an + // extending load instead. + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) + ? ISD::ZEXTLOAD : ISD::EXTLOAD; + Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(), + NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), + LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align); + Chain = Load.getValue(1); + } else { + Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), Align); + Chain = Load.getValue(1); + if (NVT.bitsLT(LVT)) + Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load); + else + Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load); + } + WorkListRemover DeadNodes(*this); + SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; + SDValue To[] = { Load, Chain }; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes); + // Since we're explcitly calling ReplaceAllUses, add the new node to the + // worklist explicitly as well. + AddToWorkList(Load.getNode()); + AddUsersToWorkList(Load.getNode()); // Add users too + // Make sure to revisit this node to clean it up; it will usually be dead. + AddToWorkList(N); + return SDValue(N, 0); } return SDValue(); @@ -6939,11 +7486,122 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); + // Check to see if this is a BUILD_VECTOR of a bunch of values + // which come from any_extend or zero_extend nodes. If so, we can create + // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR + // optimizations. We do not handle sign-extend because we can't fill the sign + // using shuffles. + EVT SourceType = MVT::Other; + bool AllAnyExt = true; + bool AllUndef = true; + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); + // Ignore undef inputs. + if (In.getOpcode() == ISD::UNDEF) continue; + AllUndef = false; + + bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; + bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; + + // Abort if the element is not an extension. + if (!ZeroExt && !AnyExt) { + SourceType = MVT::Other; + break; + } + + // The input is a ZeroExt or AnyExt. Check the original type. + EVT InTy = In.getOperand(0).getValueType(); + + // Check that all of the widened source types are the same. + if (SourceType == MVT::Other) + // First time. + SourceType = InTy; + else if (InTy != SourceType) { + // Multiple income types. Abort. + SourceType = MVT::Other; + break; + } + + // Check if all of the extends are ANY_EXTENDs. + AllAnyExt &= AnyExt; + } + + if (AllUndef) + return DAG.getUNDEF(VT); + + // In order to have valid types, all of the inputs must be extended from the + // same source type and all of the inputs must be any or zero extend. + // Scalar sizes must be a power of two. + EVT OutScalarTy = N->getValueType(0).getScalarType(); + bool ValidTypes = SourceType != MVT::Other && + isPowerOf2_32(OutScalarTy.getSizeInBits()) && + isPowerOf2_32(SourceType.getSizeInBits()); + + // We perform this optimization post type-legalization because + // the type-legalizer often scalarizes integer-promoted vectors. + // Performing this optimization before may create bit-casts which + // will be type-legalized to complex code sequences. + // We perform this optimization only before the operation legalizer because we + // may introduce illegal operations. + // Create a new simpler BUILD_VECTOR sequence which other optimizations can + // turn into a single shuffle instruction. + if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) && + ValidTypes) { + bool isLE = TLI.isLittleEndian(); + unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); + assert(ElemRatio > 1 && "Invalid element size ratio"); + SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): + DAG.getConstant(0, SourceType); + + unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements(); + SmallVector Ops(NewBVElems, Filler); + + // Populate the new build_vector + for (unsigned i=0; i < N->getNumOperands(); ++i) { + SDValue Cast = N->getOperand(i); + assert((Cast.getOpcode() == ISD::ANY_EXTEND || + Cast.getOpcode() == ISD::ZERO_EXTEND || + Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); + SDValue In; + if (Cast.getOpcode() == ISD::UNDEF) + In = DAG.getUNDEF(SourceType); + else + In = Cast->getOperand(0); + unsigned Index = isLE ? (i * ElemRatio) : + (i * ElemRatio + (ElemRatio - 1)); + + assert(Index < Ops.size() && "Invalid index"); + Ops[Index] = In; + } + + // The type of the new BUILD_VECTOR node. + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); + assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() && + "Invalid vector size"); + // Check if the new vector type is legal. + if (!isTypeLegal(VecVT)) return SDValue(); + + // Make the new BUILD_VECTOR. + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + VecVT, &Ops[0], Ops.size()); + + // The new BUILD_VECTOR node has the potential to be further optimized. + AddToWorkList(BV.getNode()); + // Bitcast to the desired type. + return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV); + } // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from // at most two distinct vectors, turn this into a shuffle node. + + // May only combine to shuffle after legalize if shuffle is legal. + if (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)) + return SDValue(); + SDValue VecIn1, VecIn2; for (unsigned i = 0; i != NumInScalars; ++i) { // Ignore undef inputs. @@ -6957,15 +7615,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { break; } - // If the input vector type disagrees with the result of the build_vector, - // we can't make a shuffle. + // We allow up to two distinct input vectors. SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); - if (ExtractedFromVec.getValueType() != VT) { - VecIn1 = VecIn2 = SDValue(0, 0); - break; - } - - // Otherwise, remember this. We allow up to two distinct input vectors. if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) continue; @@ -6980,7 +7631,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } } - // If everything is good, we can make a shuffle operation. + // If everything is good, we can make a shuffle operation. if (VecIn1.getNode()) { SmallVector Mask; for (unsigned i = 0; i != NumInScalars; ++i) { @@ -7006,14 +7657,39 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { Mask.push_back(Idx+NumInScalars); } - // Add count and size info. + // We can't generate a shuffle node with mismatched input and output types. + // Attempt to transform a single input vector to the correct type. + if ((VT != VecIn1.getValueType())) { + // We don't support shuffeling between TWO values of different types. + if (VecIn2.getNode() != 0) + return SDValue(); + + // We only support widening of vectors which are half the size of the + // output registers. For example XMM->YMM widening on X86 with AVX. + if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) + return SDValue(); + + // Widen the input vector by adding undef values. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + } + + // If VecIn2 is unused then change it to undef. + VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + + // Check that we were able to transform all incoming values to the same type. + if (VecIn2.getValueType() != VecIn1.getValueType() || + VecIn1.getValueType() != VT) + return SDValue(); + + // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. if (!isTypeLegal(VT)) return SDValue(); // Return the new VECTOR_SHUFFLE node. SDValue Ops[2]; Ops[0] = VecIn1; - Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + Ops[1] = VecIn2; return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); } @@ -7045,19 +7721,23 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) return SDValue(); - // Combine: - // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) - // Into: - // indicies are equal => V1 - // otherwise => (extract_subvec V1, ExtIdx) - // - SDValue InsIdx = N->getOperand(1); - SDValue ExtIdx = V->getOperand(2); + // Only handle cases where both indexes are constants with the same type. + ConstantSDNode *InsIdx = dyn_cast(N->getOperand(1)); + ConstantSDNode *ExtIdx = dyn_cast(V->getOperand(2)); - if (InsIdx == ExtIdx) - return V->getOperand(1); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT, - V->getOperand(0), N->getOperand(1)); + if (InsIdx && ExtIdx && + InsIdx->getValueType(0).getSizeInBits() <= 64 && + ExtIdx->getValueType(0).getSizeInBits() <= 64) { + // Combine: + // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) + // Into: + // indices are equal => V1 + // otherwise => (extract_subvec V1, ExtIdx) + if (InsIdx->getZExtValue() == ExtIdx->getZExtValue()) + return V->getOperand(1); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT, + V->getOperand(0), N->getOperand(1)); + } } return SDValue(); @@ -7068,15 +7748,63 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { unsigned NumElts = VT.getVectorNumElements(); SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); - assert(N0.getValueType().getVectorNumElements() == NumElts && - "Vector shuffle must be normalized in DAG"); + assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); - // FIXME: implement canonicalizations from DAG.getVectorShuffle() + // Canonicalize shuffle undef, undef -> undef + if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + + ShuffleVectorSDNode *SVN = cast(N); + + // Canonicalize shuffle v, v -> v, undef + if (N0 == N1) { + SmallVector NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= (int)NumElts) Idx -= NumElts; + NewMask.push_back(Idx); + } + return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT), + &NewMask[0]); + } + + // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. + if (N0.getOpcode() == ISD::UNDEF) { + SmallVector NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= 0) { + if (Idx < (int)NumElts) + Idx += NumElts; + else + Idx -= NumElts; + } + NewMask.push_back(Idx); + } + return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT), + &NewMask[0]); + } + + // Remove references to rhs if it is undef + if (N1.getOpcode() == ISD::UNDEF) { + bool Changed = false; + SmallVector NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= (int)NumElts) { + Idx = -1; + Changed = true; + } + NewMask.push_back(Idx); + } + if (Changed) + return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]); + } // If it is a splat, check if the argument vector is another splat or a // build_vector with all scalar elements the same. - ShuffleVectorSDNode *SVN = cast(N); if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { SDNode *V = N0.getNode(); @@ -7115,6 +7843,40 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return N0; } } + + // If this shuffle node is simply a swizzle of another shuffle node, + // and it reverses the swizzle of the previous shuffle then we can + // optimize shuffle(shuffle(x, undef), undef) -> x. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + N1.getOpcode() == ISD::UNDEF) { + + ShuffleVectorSDNode *OtherSV = cast(N0); + + // Shuffle nodes can only reverse shuffles with a single non-undef value. + if (N0.getOperand(1).getOpcode() != ISD::UNDEF) + return SDValue(); + + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(OtherSV->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + assert(Idx < (int)NumElts && "Index references undef operand"); + // Next, this index comes from the first value, which is the incoming + // shuffle. Adopt the incoming index. + if (Idx >= 0) + Idx = OtherSV->getMaskElt(Idx); + + // The combined shuffle must map each index to itself. + if (Idx >= 0 && (unsigned)Idx != i) + return SDValue(); + } + + return OtherSV->getOperand(0); + } + return SDValue(); } @@ -7190,7 +7952,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { SDValue Elt = RHS.getOperand(i); if (!isa(Elt)) return SDValue(); - else if (cast(Elt)->isAllOnesValue()) + + if (cast(Elt)->isAllOnesValue()) Indices.push_back(i); else if (cast(Elt)->isNullValue()) Indices.push_back(NumElts); @@ -7261,8 +8024,19 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } EVT VT = LHSOp.getValueType(); - assert(RHSOp.getValueType() == VT && - "SimplifyVBinOp with different BUILD_VECTOR element types"); + EVT RVT = RHSOp.getValueType(); + if (RVT != VT) { + // Integer BUILD_VECTOR operands may have types larger than the element + // size (e.g., when the element type is not legal). Prior to type + // legalization, the types may not match between the two BUILD_VECTORS. + // Truncate one of the operands to make them match. + if (RVT.getSizeInBits() > VT.getSizeInBits()) { + RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp); + } else { + LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp); + VT = RVT; + } + } SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT, LHSOp, RHSOp); if (FoldOp.getOpcode() != ISD::UNDEF && @@ -7374,8 +8148,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if ((LLD->hasAnyUseOfValue(1) && (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || - (LLD->hasAnyUseOfValue(1) && - (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS)))) + (RLD->hasAnyUseOfValue(1) && + (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) return false; Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), @@ -7393,7 +8167,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // FIXME: Discards pointer info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->isVolatile(), LLD->isNonTemporal(), - LLD->getAlignment()); + LLD->isInvariant(), LLD->getAlignment()); } else { Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() : LLD->getExtensionType(), @@ -7509,7 +8283,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, AddToWorkList(CPIdx.getNode()); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), false, - false, Alignment); + false, false, Alignment); } } @@ -7517,8 +8291,6 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // Check to see if we can perform the "gzip trick", transforming // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && - N0.getValueType().isInteger() && - N2.getValueType().isInteger() && (N1C->isNullValue() || // (a < 0) ? b : 0 (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 EVT XType = N0.getValueType(); @@ -7720,7 +8492,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, /// SDValue DAGCombiner::BuildSDIV(SDNode *N) { std::vector Built; - SDValue S = TLI.BuildSDIV(N, DAG, &Built); + SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built); for (std::vector::iterator ii = Built.begin(), ee = Built.end(); ii != ee; ++ii) @@ -7734,7 +8506,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) { /// SDValue DAGCombiner::BuildUDIV(SDNode *N) { std::vector Built; - SDValue S = TLI.BuildUDIV(N, DAG, &Built); + SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built); for (std::vector::iterator ii = Built.begin(), ee = Built.end(); ii != ee; ++ii) @@ -7856,30 +8628,20 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool DAGCombiner::FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, - const Value *&SrcValue, - int &SrcValueOffset, - unsigned &SrcValueAlign, - const MDNode *&TBAAInfo) const { - if (LoadSDNode *LD = dyn_cast(N)) { - Ptr = LD->getBasePtr(); - Size = LD->getMemoryVT().getSizeInBits() >> 3; - SrcValue = LD->getSrcValue(); - SrcValueOffset = LD->getSrcValueOffset(); - SrcValueAlign = LD->getOriginalAlignment(); - TBAAInfo = LD->getTBAAInfo(); - return true; - } - if (StoreSDNode *ST = dyn_cast(N)) { - Ptr = ST->getBasePtr(); - Size = ST->getMemoryVT().getSizeInBits() >> 3; - SrcValue = ST->getSrcValue(); - SrcValueOffset = ST->getSrcValueOffset(); - SrcValueAlign = ST->getOriginalAlignment(); - TBAAInfo = ST->getTBAAInfo(); - return false; - } - llvm_unreachable("FindAliasInfo expected a memory operand"); + SDValue &Ptr, int64_t &Size, + const Value *&SrcValue, + int &SrcValueOffset, + unsigned &SrcValueAlign, + const MDNode *&TBAAInfo) const { + LSBaseSDNode *LS = cast(N); + + Ptr = LS->getBasePtr(); + Size = LS->getMemoryVT().getSizeInBits() >> 3; + SrcValue = LS->getSrcValue(); + SrcValueOffset = LS->getSrcValueOffset(); + SrcValueAlign = LS->getOriginalAlignment(); + TBAAInfo = LS->getTBAAInfo(); + return isa(LS); } /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index e8f8c73d6883..0c1ac6982d2a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -39,6 +39,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "isel" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" @@ -58,8 +59,15 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" using namespace llvm; +STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " + "target-independent selector"); +STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " + "target-specific selector"); +STATISTIC(NumFastIselDead, "Number of dead insts removed on failure"); + /// startNewBlock - Set the current block to which generated machine /// instructions will be appended, and clear the local CSE map. /// @@ -96,6 +104,11 @@ bool FastISel::hasTrivialKill(const Value *V) const { !hasTrivialKill(Cast->getOperand(0))) return false; + // GEPs with all zero indices are trivially coalesced by fast-isel. + if (const GetElementPtrInst *GEP = dyn_cast(I)) + if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0))) + return false; + // Only instructions with a single use in the same basic block are considered // to have trivial kills. return I->hasOneUse() && @@ -123,15 +136,8 @@ unsigned FastISel::getRegForValue(const Value *V) { return 0; } - // Look up the value to see if we already have a register for it. We - // cache values defined by Instructions across blocks, and other values - // only locally. This is because Instructions already have the SSA - // def-dominates-use requirement enforced. - DenseMap::iterator I = FuncInfo.ValueMap.find(V); - if (I != FuncInfo.ValueMap.end()) - return I->second; - - unsigned Reg = LocalValueMap[V]; + // Look up the value to see if we already have a register for it. + unsigned Reg = lookUpRegForValue(V); if (Reg != 0) return Reg; @@ -186,7 +192,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { uint32_t IntBitWidth = IntVT.getSizeInBits(); bool isExact; (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, - APFloat::rmTowardZero, &isExact); + APFloat::rmTowardZero, &isExact); if (isExact) { APInt IntVal(IntBitWidth, x); @@ -297,6 +303,18 @@ void FastISel::recomputeInsertPt() { ++FuncInfo.InsertPt; } +void FastISel::removeDeadCode(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E) { + assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!"); + while (I != E) { + MachineInstr *Dead = &*I; + ++I; + Dead->eraseFromParent(); + ++NumFastIselDead; + } + recomputeInsertPt(); +} + FastISel::SavePoint FastISel::enterLocalValueArea() { MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt; DebugLoc OldDL = DL; @@ -377,6 +395,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { ISDOpcode = ISD::SRA; } + // Transform "urem x, pow2" -> "and x, pow2-1". + if (ISDOpcode == ISD::UREM && isa(I) && + isPowerOf2_64(Imm)) { + --Imm; + ISDOpcode = ISD::AND; + } + unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, Imm, VT.getSimpleVT()); if (ResultReg == 0) return false; @@ -427,6 +452,11 @@ bool FastISel::SelectGetElementPtr(const User *I) { bool NIsKill = hasTrivialKill(I->getOperand(0)); + // Keep a running tab of the total offset to coalesce multiple N = N + Offset + // into a single N = N + TotalOffset. + uint64_t TotalOffs = 0; + // FIXME: What's a good SWAG number for MaxOffs? + uint64_t MaxOffs = 2048; Type *Ty = I->getOperand(0)->getType(); MVT VT = TLI.getPointerTy(); for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1, @@ -436,14 +466,15 @@ bool FastISel::SelectGetElementPtr(const User *I) { unsigned Field = cast(Idx)->getZExtValue(); if (Field) { // N = N + Offset - uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field); - // FIXME: This can be optimized by combining the add with a - // subsequent one. - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. - return false; - NIsKill = true; + TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field); + if (TotalOffs >= MaxOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } } Ty = StTy->getElementType(Field); } else { @@ -452,14 +483,26 @@ bool FastISel::SelectGetElementPtr(const User *I) { // If this is a constant subscript, handle it quickly. if (const ConstantInt *CI = dyn_cast(Idx)) { if (CI->isZero()) continue; - uint64_t Offs = + // N = N + Offset + TotalOffs += TD.getTypeAllocSize(Ty)*cast(CI)->getSExtValue(); - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); + if (TotalOffs >= MaxOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } + continue; + } + if (TotalOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); if (N == 0) // Unhandled operand. Halt "fast" selection and bail. return false; NIsKill = true; - continue; + TotalOffs = 0; } // N = N + Idx * ElementSize; @@ -484,6 +527,12 @@ bool FastISel::SelectGetElementPtr(const User *I) { return false; } } + if (TotalOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + } // We successfully emitted code for the given LLVM Instruction. UpdateValueMap(I, N); @@ -512,21 +561,32 @@ bool FastISel::SelectCall(const User *I) { return true; } + MachineModuleInfo &MMI = FuncInfo.MF->getMMI(); + ComputeUsesVAFloatArgument(*Call, &MMI); + const Function *F = Call->getCalledFunction(); if (!F) return false; // Handle selected intrinsic function calls. switch (F->getIntrinsicID()) { default: break; + // At -O0 we don't care about the lifetime intrinsics. + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return true; case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast(Call); if (!DIVariable(DI->getVariable()).Verify() || - !FuncInfo.MF->getMMI().hasDebugInfo()) + !FuncInfo.MF->getMMI().hasDebugInfo()) { + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; + } const Value *Address = DI->getAddress(); - if (!Address || isa(Address) || isa(Address)) + if (!Address || isa(Address)) { + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; + } unsigned Reg = 0; unsigned Offset = 0; @@ -534,16 +594,36 @@ bool FastISel::SelectCall(const User *I) { // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); if (Offset) - Reg = TRI.getFrameRegister(*FuncInfo.MF); + Reg = TRI.getFrameRegister(*FuncInfo.MF); } if (!Reg) - Reg = getRegForValue(Address); + Reg = lookUpRegForValue(Address); + + // If we have a VLA that has a "use" in a metadata node that's then used + // here but it has no other uses, then we have a problem. E.g., + // + // int foo (const int *x) { + // char a[*x]; + // return 0; + // } + // + // If we assign 'a' a vreg and fast isel later on has to use the selection + // DAG isel, it will want to copy the value to the vreg. However, there are + // no uses, which goes counter to what selection DAG isel expects. + if (!Reg && !Address->use_empty() && isa(Address) && + (!isa(Address) || + !FuncInfo.StaticAllocaMap.count(cast(Address)))) + Reg = FuncInfo.InitializeRegForValue(Address); if (Reg) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::DBG_VALUE)) .addReg(Reg, RegState::Debug).addImm(Offset) .addMetadata(DI->getVariable()); + else + // We can't yet handle anything else here because it would require + // generating code, thus altering codegen because of debug info. + DEBUG(dbgs() << "Dropping debug info for " << DI); return true; } case Intrinsic::dbg_value: { @@ -581,60 +661,6 @@ bool FastISel::SelectCall(const User *I) { } return true; } - case Intrinsic::eh_exception: { - EVT VT = TLI.getValueType(Call->getType()); - if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand) - break; - - assert(FuncInfo.MBB->isLandingPad() && - "Call to eh.exception not in landing pad!"); - unsigned Reg = TLI.getExceptionAddressRegister(); - const TargetRegisterClass *RC = TLI.getRegClassFor(VT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - UpdateValueMap(Call, ResultReg); - return true; - } - case Intrinsic::eh_selector: { - EVT VT = TLI.getValueType(Call->getType()); - if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand) - break; - if (FuncInfo.MBB->isLandingPad()) - AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB); - else { -#ifndef NDEBUG - FuncInfo.CatchInfoLost.insert(Call); -#endif - // FIXME: Mark exception selector register as live in. Hack for PR1508. - unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBB->addLiveIn(Reg); - } - - unsigned Reg = TLI.getExceptionSelectorRegister(); - EVT SrcVT = TLI.getPointerTy(); - const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - - bool ResultRegIsKill = hasTrivialKill(Call); - - // Cast the register to the type of the selector. - if (SrcVT.bitsGT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, - ResultReg, ResultRegIsKill); - else if (SrcVT.bitsLT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, - ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); - if (ResultReg == 0) - // Unhandled operand. Halt "fast" selection and bail. - return false; - - UpdateValueMap(Call, ResultReg); - - return true; - } case Intrinsic::objectsize: { ConstantInt *CI = cast(Call->getArgOperand(1)); unsigned long long Res = CI->isZero() ? -1ULL : 0; @@ -726,8 +752,8 @@ bool FastISel::SelectBitCast(const User *I) { // First, try to perform the bitcast by inserting a reg-reg copy. unsigned ResultReg = 0; if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { - TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); - TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); + const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); + const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); // Don't attempt a cross-class copy. It will likely fail. if (SrcClass == DstClass) { ResultReg = createResultReg(DstClass); @@ -758,17 +784,33 @@ FastISel::SelectInstruction(const Instruction *I) { DL = I->getDebugLoc(); + MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt; + // First, try doing target-independent selection. if (SelectOperator(I, I->getOpcode())) { + ++NumFastIselSuccessIndependent; DL = DebugLoc(); return true; } + // Remove dead code. However, ignore call instructions since we've flushed + // the local value map and recomputed the insert point. + if (!isa(I)) { + recomputeInsertPt(); + if (SavedInsertPt != FuncInfo.InsertPt) + removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); + } // Next, try calling the target to attempt to handle the instruction. + SavedInsertPt = FuncInfo.InsertPt; if (TargetSelectInstruction(I)) { + ++NumFastIselSuccessTarget; DL = DebugLoc(); return true; } + // Check for dead code and remove as necessary. + recomputeInsertPt(); + if (SavedInsertPt != FuncInfo.InsertPt) + removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); DL = DebugLoc(); return false; @@ -779,8 +821,11 @@ FastISel::SelectInstruction(const Instruction *I) { /// the CFG. void FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { - if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) { - // The unconditional fall-through case, which needs no instructions. + + if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { + // For more accurate line information if this is the only instruction + // in the block then emit it, otherwise we have the unconditional + // fall-through case, which needs no instructions. } else { // The unconditional branch case. TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL, @@ -1354,8 +1399,8 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // exactly one register for each non-void instruction. EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { - // Promote MVT::i1. - if (VT == MVT::i1) + // Handle integer promotions, though, because they're common and easy. + if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT); else { FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index b052740a1abe..8dde919079d9 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "function-lowering-info" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -68,7 +69,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { GetReturnInfo(Fn->getReturnType(), Fn->getAttributes().getRetAttributes(), Outs, TLI); CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF, - Fn->isVarArg(), + Fn->isVarArg(), Outs, Fn->getContext()); // Initialize the mapping of values to registers. This is only set up for @@ -92,14 +93,16 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { // candidate. I.e., it would trigger the creation of a stack protector. bool MayNeedSP = (AI->isArrayAllocation() || - (TySize > 8 && isa(Ty) && + (TySize >= 8 && isa(Ty) && cast(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP); + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, + MayNeedSP); } for (; BB != EB; ++BB) - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { // Mark values used outside their block as exported, by allocating // a virtual register for them. if (isUsedOutsideOfDefiningBlock(I)) @@ -355,7 +358,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { /// argument. This overrides previous frame index entry for this argument, /// if any. void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A, - int FI) { + int FI) { ByValArgFrameIndexMap[A] = FI; } @@ -367,10 +370,34 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) { ByValArgFrameIndexMap.find(A); if (I != ByValArgFrameIndexMap.end()) return I->second; - DEBUG(dbgs() << "Argument does not have assigned frame index!"); + DEBUG(dbgs() << "Argument does not have assigned frame index!\n"); return 0; } +/// ComputeUsesVAFloatArgument - Determine if any floating-point values are +/// being passed to this variadic function, and set the MachineModuleInfo's +/// usesVAFloatArgument flag if so. This flag is used to emit an undefined +/// reference to _fltused on Windows, which will link in MSVCRT's +/// floating-point support. +void llvm::ComputeUsesVAFloatArgument(const CallInst &I, + MachineModuleInfo *MMI) +{ + FunctionType *FT = cast( + I.getCalledValue()->getType()->getContainedType(0)); + if (FT->isVarArg() && !MMI->usesVAFloatArgument()) { + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + Type* T = I.getArgOperand(i)->getType(); + for (po_iterator i = po_begin(T), e = po_end(T); + i != e; ++i) { + if (i->isFloatingPointTy()) { + MMI->setUsesVAFloatArgument(true); + return; + } + } + } + } +} + /// AddCatchInfo - Extract the personality and type infos from an eh.selector /// call, and add them to the specified machine basic block. void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, @@ -425,34 +452,6 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, } } -void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad, - MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) { - SmallPtrSet Visited; - - // The 'eh.selector' call may not be in the direct successor of a basic block, - // but could be several successors deeper. If we don't find it, try going one - // level further. - while (Visited.insert(SuccBB)) { - for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end(); - I != E; ++I) - if (const EHSelectorInst *EHSel = dyn_cast(I)) { - // Apply the catch info to LPad. - AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]); -#ifndef NDEBUG - if (!FLI.MBBMap[SuccBB]->isLandingPad()) - FLI.CatchInfoFound.insert(EHSel); -#endif - return; - } - - const BranchInst *Br = dyn_cast(SuccBB->getTerminator()); - if (Br && Br->isUnconditional()) - SuccBB = Br->getSuccessor(0); - else - break; - } -} - /// AddLandingPadInfo - Extract the exception handling information from the /// landingpad instruction and add them to the specified machine module info. void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 2ff66f8f8715..1467d887789c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -294,7 +294,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, const TargetRegisterClass *DstRC = 0; if (IIOpNum < II->getNumOperands()) DstRC = TII->getRegClass(*II, IIOpNum, TRI); - assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) && + assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) && "Don't have operand info for this instruction!"); if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); @@ -351,6 +351,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, MI->addOperand(MachineOperand::CreateFPImm(CFP)); } else if (RegisterSDNode *R = dyn_cast(Op)) { MI->addOperand(MachineOperand::CreateReg(R->getReg(), false)); + } else if (RegisterMaskSDNode *RM = dyn_cast(Op)) { + MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask())); } else if (GlobalAddressSDNode *TGA = dyn_cast(Op)) { MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(), TGA->getTargetFlags())); @@ -574,14 +576,19 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, for (unsigned i = 1; i != NumOps; ++i) { SDValue Op = Node->getOperand(i); if ((i & 1) == 0) { - unsigned SubIdx = cast(Op)->getZExtValue(); - unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); - const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); - const TargetRegisterClass *SRC = + RegisterSDNode *R = dyn_cast(Node->getOperand(i-1)); + // Skip physical registers as they don't have a vreg to get and we'll + // insert copies for them in TwoAddressInstructionPass anyway. + if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + unsigned SubIdx = cast(Op)->getZExtValue(); + unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); + const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); + const TargetRegisterClass *SRC = TRI->getMatchingSuperRegClass(RC, TRC, SubIdx); - if (SRC && SRC != RC) { - MRI->setRegClass(NewVReg, SRC); - RC = SRC; + if (SRC && SRC != RC) { + MRI->setRegClass(NewVReg, SRC); + RC = SRC; + } } } AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false, @@ -700,33 +707,6 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Create the new machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II); - // The MachineInstr constructor adds implicit-def operands. Scan through - // these to determine which are dead. - if (MI->getNumOperands() != 0 && - Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { - // First, collect all used registers. - SmallVector UsedRegs; - for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) - if (F->getOpcode() == ISD::CopyFromReg) - UsedRegs.push_back(cast(F->getOperand(1))->getReg()); - else { - // Collect declared implicit uses. - const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); - UsedRegs.append(MCID.getImplicitUses(), - MCID.getImplicitUses() + MCID.getNumImplicitUses()); - // In addition to declared implicit uses, we must also check for - // direct RegisterSDNode operands. - for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) - if (RegisterSDNode *R = dyn_cast(F->getOperand(i))) { - unsigned Reg = R->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - UsedRegs.push_back(Reg); - } - } - // Then mark unused registers as dead. - MI->setPhysRegsDeadExcept(UsedRegs, *TRI); - } - // Add result register values for things that are defined by this // instruction. if (NumResults) @@ -751,30 +731,63 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // hook knows where in the block to insert the replacement code. MBB->insert(InsertPos, MI); + // The MachineInstr may also define physregs instead of virtregs. These + // physreg values can reach other instructions in different ways: + // + // 1. When there is a use of a Node value beyond the explicitly defined + // virtual registers, we emit a CopyFromReg for one of the implicitly + // defined physregs. This only happens when HasPhysRegOuts is true. + // + // 2. A CopyFromReg reading a physreg may be glued to this instruction. + // + // 3. A glued instruction may implicitly use a physreg. + // + // 4. A glued instruction may use a RegisterSDNode operand. + // + // Collect all the used physreg defs, and make sure that any unused physreg + // defs are marked as dead. + SmallVector UsedRegs; + // Additional results must be physical register defs. if (HasPhysRegOuts) { for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; - if (Node->hasAnyUseOfValue(i)) - EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); - // If there are no uses, mark the register as dead now, so that - // MachineLICM/Sink can see that it's dead. Don't do this if the - // node has a Glue value, for the benefit of targets still using - // Glue for values in physregs. - else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue) - MI->addRegisterDead(Reg, TRI); + if (!Node->hasAnyUseOfValue(i)) + continue; + // This implicitly defined physreg has a use. + UsedRegs.push_back(Reg); + EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); } } - // If the instruction has implicit defs and the node doesn't, mark the - // implicit def as dead. If the node has any glue outputs, we don't do this - // because we don't know what implicit defs are being used by glued nodes. - if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue) - if (const unsigned *IDList = II.getImplicitDefs()) { - for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs(); - i != e; ++i) - MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI); + // Scan the glue chain for any used physregs. + if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { + for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) { + if (F->getOpcode() == ISD::CopyFromReg) { + UsedRegs.push_back(cast(F->getOperand(1))->getReg()); + continue; + } else if (F->getOpcode() == ISD::CopyToReg) { + // Skip CopyToReg nodes that are internal to the glue chain. + continue; + } + // Collect declared implicit uses. + const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); + UsedRegs.append(MCID.getImplicitUses(), + MCID.getImplicitUses() + MCID.getNumImplicitUses()); + // In addition to declared implicit uses, we must also check for + // direct RegisterSDNode operands. + for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) + if (RegisterSDNode *R = dyn_cast(F->getOperand(i))) { + unsigned Reg = R->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + UsedRegs.push_back(Reg); + } } + } + + // Finally mark unused registers as dead. + if (!UsedRegs.empty() || II.getImplicitDefs()) + MI->setPhysRegsDeadExcept(UsedRegs, *TRI); // Run post-isel target hook to adjust this instruction if needed. #ifdef NDEBUG @@ -794,10 +807,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, Node->dump(); #endif llvm_unreachable("This target-independent node should have been selected!"); - break; case ISD::EntryToken: llvm_unreachable("EntryToken should have been excluded from the schedule!"); - break; case ISD::MERGE_VALUES: case ISD::TokenFactor: // fall thru break; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 63255ae2ebd9..a96a99781f4e 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -46,47 +46,27 @@ using namespace llvm; /// will attempt merge setcc and brc instructions into brcc's. /// namespace { -class SelectionDAGLegalize { +class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { const TargetMachine &TM; const TargetLowering &TLI; SelectionDAG &DAG; + /// LegalizePosition - The iterator for walking through the node list. + SelectionDAG::allnodes_iterator LegalizePosition; + + /// LegalizedNodes - The set of nodes which have already been legalized. + SmallPtrSet LegalizedNodes; + // Libcall insertion helpers. - /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been - /// legalized. We use this to ensure that calls are properly serialized - /// against each other, including inserted libcalls. - SDValue LastCALLSEQ_END; - - /// IsLegalizingCall - This member is used *only* for purposes of providing - /// helpful assertions that a libcall isn't created while another call is - /// being legalized (which could lead to non-serialized call sequences). - bool IsLegalizingCall; - - /// LegalizedNodes - For nodes that are of legal width, and that have more - /// than one use, this map indicates what regularized operand to use. This - /// allows us to avoid legalizing the same thing more than once. - DenseMap LegalizedNodes; - - void AddLegalizedOperand(SDValue From, SDValue To) { - LegalizedNodes.insert(std::make_pair(From, To)); - // If someone requests legalization of the new node, return itself. - if (From != To) - LegalizedNodes.insert(std::make_pair(To, To)); - - // Transfer SDDbgValues. - DAG.TransferDbgValues(From, To); - } - public: explicit SelectionDAGLegalize(SelectionDAG &DAG); void LegalizeDAG(); private: - /// LegalizeOp - Return a legal replacement for the given operation, with - /// all legal operands. - SDValue LegalizeOp(SDValue O); + /// LegalizeOp - Legalizes the given operation. + void LegalizeOp(SDNode *Node); SDValue OptimizeFloatStore(StoreSDNode *ST); @@ -105,10 +85,7 @@ class SelectionDAGLegalize { /// e.g. <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SDValue N1, SDValue N2, - SmallVectorImpl &Mask) const; - - bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, - SmallPtrSet &NodesLeadingTo); + ArrayRef Mask) const; void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, DebugLoc dl); @@ -150,10 +127,46 @@ class SelectionDAGLegalize { SDValue ExpandInsertToVectorThroughStack(SDValue Op); SDValue ExpandVectorBuildThroughStack(SDNode* Node); + SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); + std::pair ExpandAtomic(SDNode *Node); - void ExpandNode(SDNode *Node, SmallVectorImpl &Results); - void PromoteNode(SDNode *Node, SmallVectorImpl &Results); + void ExpandNode(SDNode *Node); + void PromoteNode(SDNode *Node); + + void ForgetNode(SDNode *N) { + LegalizedNodes.erase(N); + if (LegalizePosition == SelectionDAG::allnodes_iterator(N)) + ++LegalizePosition; + } + +public: + // DAGUpdateListener implementation. + virtual void NodeDeleted(SDNode *N, SDNode *E) { + ForgetNode(N); + } + virtual void NodeUpdated(SDNode *N) {} + + // Node replacement helpers + void ReplacedNode(SDNode *N) { + if (N->use_empty()) { + DAG.RemoveDeadNode(N, this); + } else { + ForgetNode(N); + } + } + void ReplaceNode(SDNode *Old, SDNode *New) { + DAG.ReplaceAllUsesWith(Old, New, this); + ReplacedNode(Old); + } + void ReplaceNode(SDValue Old, SDValue New) { + DAG.ReplaceAllUsesWith(Old, New, this); + ReplacedNode(Old.getNode()); + } + void ReplaceNode(SDNode *Old, const SDValue *New) { + DAG.ReplaceAllUsesWith(Old, New, this); + ReplacedNode(Old); + } }; } @@ -164,7 +177,7 @@ class SelectionDAGLegalize { SDValue SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SDValue N1, SDValue N2, - SmallVectorImpl &Mask) const { + ArrayRef Mask) const { unsigned NumMaskElts = VT.getVectorNumElements(); unsigned NumDestElts = NVT.getVectorNumElements(); unsigned NumEltsGrowth = NumDestElts / NumMaskElts; @@ -195,145 +208,37 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) } void SelectionDAGLegalize::LegalizeDAG() { - LastCALLSEQ_END = DAG.getEntryNode(); - IsLegalizingCall = false; - - // The legalize process is inherently a bottom-up recursive process (users - // legalize their uses before themselves). Given infinite stack space, we - // could just start legalizing on the root and traverse the whole graph. In - // practice however, this causes us to run out of stack space on large basic - // blocks. To avoid this problem, compute an ordering of the nodes where each - // node is only legalized after all of its operands are legalized. DAG.AssignTopologicalOrder(); - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) - LegalizeOp(SDValue(I, 0)); - // Finally, it's possible the root changed. Get the new root. - SDValue OldRoot = DAG.getRoot(); - assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); - DAG.setRoot(LegalizedNodes[OldRoot]); + // Visit all the nodes. We start in topological order, so that we see + // nodes with their original operands intact. Legalization can produce + // new nodes which may themselves need to be legalized. Iterate until all + // nodes have been legalized. + for (;;) { + bool AnyLegalized = false; + for (LegalizePosition = DAG.allnodes_end(); + LegalizePosition != DAG.allnodes_begin(); ) { + --LegalizePosition; - LegalizedNodes.clear(); + SDNode *N = LegalizePosition; + if (LegalizedNodes.insert(N)) { + AnyLegalized = true; + LegalizeOp(N); + } + } + if (!AnyLegalized) + break; + + } // Remove dead nodes now. DAG.RemoveDeadNodes(); } - -/// FindCallEndFromCallStart - Given a chained node that is part of a call -/// sequence, find the CALLSEQ_END node that terminates the call sequence. -static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { - // Nested CALLSEQ_START/END constructs aren't yet legal, - // but we can DTRT and handle them correctly here. - if (Node->getOpcode() == ISD::CALLSEQ_START) - depth++; - else if (Node->getOpcode() == ISD::CALLSEQ_END) { - depth--; - if (depth == 0) - return Node; - } - if (Node->use_empty()) - return 0; // No CallSeqEnd - - // The chain is usually at the end. - SDValue TheChain(Node, Node->getNumValues()-1); - if (TheChain.getValueType() != MVT::Other) { - // Sometimes it's at the beginning. - TheChain = SDValue(Node, 0); - if (TheChain.getValueType() != MVT::Other) { - // Otherwise, hunt for it. - for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i) - if (Node->getValueType(i) == MVT::Other) { - TheChain = SDValue(Node, i); - break; - } - - // Otherwise, we walked into a node without a chain. - if (TheChain.getValueType() != MVT::Other) - return 0; - } - } - - for (SDNode::use_iterator UI = Node->use_begin(), - E = Node->use_end(); UI != E; ++UI) { - - // Make sure to only follow users of our token chain. - SDNode *User = *UI; - for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) - if (User->getOperand(i) == TheChain) - if (SDNode *Result = FindCallEndFromCallStart(User, depth)) - return Result; - } - return 0; -} - -/// FindCallStartFromCallEnd - Given a chained node that is part of a call -/// sequence, find the CALLSEQ_START node that initiates the call sequence. -static SDNode *FindCallStartFromCallEnd(SDNode *Node) { - int nested = 0; - assert(Node && "Didn't find callseq_start for a call??"); - while (Node->getOpcode() != ISD::CALLSEQ_START || nested) { - Node = Node->getOperand(0).getNode(); - assert(Node->getOperand(0).getValueType() == MVT::Other && - "Node doesn't have a token chain argument!"); - switch (Node->getOpcode()) { - default: - break; - case ISD::CALLSEQ_START: - if (!nested) - return Node; - nested--; - break; - case ISD::CALLSEQ_END: - nested++; - break; - } - } - return 0; -} - -/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to -/// see if any uses can reach Dest. If no dest operands can get to dest, -/// legalize them, legalize ourself, and return false, otherwise, return true. -/// -/// Keep track of the nodes we fine that actually do lead to Dest in -/// NodesLeadingTo. This avoids retraversing them exponential number of times. -/// -bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, - SmallPtrSet &NodesLeadingTo) { - if (N == Dest) return true; // N certainly leads to Dest :) - - // If we've already processed this node and it does lead to Dest, there is no - // need to reprocess it. - if (NodesLeadingTo.count(N)) return true; - - // If the first result of this node has been already legalized, then it cannot - // reach N. - if (LegalizedNodes.count(SDValue(N, 0))) return false; - - // Okay, this node has not already been legalized. Check and legalize all - // operands. If none lead to Dest, then we can legalize this node. - bool OperandsLeadToDest = false; - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - OperandsLeadToDest |= // If an operand leads to Dest, so do we. - LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, - NodesLeadingTo); - - if (OperandsLeadToDest) { - NodesLeadingTo.insert(N); - return true; - } - - // Okay, this node looks safe, legalize it and return false. - LegalizeOp(SDValue(N, 0)); - return false; -} - /// ExpandConstantFP - Expands the ConstantFP node to an integer constant or /// a load from the constant pool. -static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, - SelectionDAG &DAG, const TargetLowering &TLI) { +SDValue +SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { bool Extend = false; DebugLoc dl = CFP->getDebugLoc(); @@ -369,20 +274,27 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); unsigned Alignment = cast(CPIdx)->getAlignment(); - if (Extend) - return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, - DAG.getEntryNode(), - CPIdx, MachinePointerInfo::getConstantPool(), - VT, false, false, Alignment); - return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), false, false, - Alignment); + if (Extend) { + SDValue Result = + DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, + DAG.getEntryNode(), + CPIdx, MachinePointerInfo::getConstantPool(), + VT, false, false, Alignment); + return Result; + } + SDValue Result = + DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), false, false, false, + Alignment); + return Result; } /// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. -static -SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, - const TargetLowering &TLI) { +static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, + const TargetLowering &TLI, + SelectionDAGLegalize *DAGLegalize) { + assert(ST->getAddressingMode() == ISD::UNINDEXED && + "unaligned indexed stores not implemented!"); SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); SDValue Val = ST->getValue(); @@ -397,8 +309,10 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // same size, then a (misaligned) int store. // FIXME: Does not handle truncating floating point stores! SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); - return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), Alignment); + Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), + ST->isVolatile(), ST->isNonTemporal(), Alignment); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); + return; } // Do a (aligned) store to a stack slot, then copy from the stack slot // to the final destination using (unaligned) integer loads and stores. @@ -427,7 +341,7 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Load one integer register's worth from the stack slot. SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Store it to the final location. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, ST->getPointerInfo().getWithOffset(Offset), @@ -458,8 +372,11 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset))); // The order of the stores doesn't matter - say it with a TokenFactor. - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], - Stores.size()); + SDValue Result = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); + return; } assert(ST->getMemoryVT().isInteger() && !ST->getMemoryVT().isVector() && @@ -488,13 +405,18 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), Alignment); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + SDValue Result = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); } /// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. -static -SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, - const TargetLowering &TLI) { +static void +ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, + const TargetLowering &TLI, + SDValue &ValResult, SDValue &ChainResult) { + assert(LD->getAddressingMode() == ISD::UNINDEXED && + "unaligned indexed loads not implemented!"); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0); @@ -507,13 +429,15 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // then bitconvert to floating point or vector. SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); if (VT.isFloatingPoint() && LoadedVT != VT) Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); - SDValue Ops[] = { Result, Chain }; - return DAG.getMergeValues(Ops, 2, dl); + ValResult = Result; + ChainResult = Chain; + return; } // Copy the value to a (aligned) stack slot using (unaligned) integer @@ -537,6 +461,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), MinAlign(LD->getAlignment(), Offset)); // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, @@ -572,8 +497,9 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, MachinePointerInfo(), LoadedVT, false, false, 0); // Callers expect a MERGE_VALUES node. - SDValue Ops[] = { Load, TF }; - return DAG.getMergeValues(Ops, 2, dl); + ValResult = Load; + ChainResult = TF; + return; } assert(LoadedVT.isInteger() && !LoadedVT.isVector() && "Unaligned load of unsupported type."); @@ -626,8 +552,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - SDValue Ops[] = { Result, TF }; - return DAG.getMergeValues(Ops, 2, dl); + ValResult = Result; + ChainResult = TF; } /// PerformInsertVectorEltInMemory - Some target cannot handle a variable @@ -672,7 +598,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, false, false, 0); // Load the updated vector. return DAG.getLoad(VT, dl, Ch, StackPtr, - MachinePointerInfo::getFixedStack(SPFI), false, false, 0); + MachinePointerInfo::getFixedStack(SPFI), false, false, + false, 0); } @@ -763,11 +690,10 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { /// LegalizeOp - Return a legal replacement for the given operation, with /// all legal operands. -SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { - if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. - return Op; +void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { + if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. + return; - SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) @@ -782,13 +708,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); - // Note that LegalizeOp may be reentered even from single-use nodes, which - // means that we always must cache transformed nodes. - DenseMap::iterator I = LegalizedNodes.find(Op); - if (I != LegalizedNodes.end()) return I->second; - SDValue Tmp1, Tmp2, Tmp3, Tmp4; - SDValue Result = Op; bool isCustom = false; // Figure out the correct action; the way to query this varies by opcode @@ -798,10 +718,15 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: - case ISD::VAARG: case ISD::STACKSAVE: Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; + case ISD::VAARG: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + if (Action != TargetLowering::Promote) + Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); + break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::EXTRACT_VECTOR_ELT: @@ -865,7 +790,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::FRAME_TO_ARGS_OFFSET: case ISD::EH_SJLJ_SETJMP: case ISD::EH_SJLJ_LONGJMP: - case ISD::EH_SJLJ_DISPATCHSETUP: // These operations lie about being legal: when they claim to be legal, // they should actually be expanded. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -882,17 +806,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; - case ISD::BUILD_VECTOR: - // A weird case: legalization for BUILD_VECTOR never legalizes the - // operands! - // FIXME: This really sucks... changing it isn't semantically incorrect, - // but it massively pessimizes the code for floating-point BUILD_VECTORs - // because ConstantFP operands get legalized into constant pool loads - // before the BUILD_VECTOR code can see them. It doesn't usually bite, - // though, because BUILD_VECTORS usually get lowered into other nodes - // which get legalized properly. - SimpleFinishLegalizing = false; - break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -903,22 +816,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } if (SimpleFinishLegalizing) { - SmallVector Ops, ResultVals; + SmallVector Ops; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) - Ops.push_back(LegalizeOp(Node->getOperand(i))); + Ops.push_back(Node->getOperand(i)); switch (Node->getOpcode()) { default: break; - case ISD::BR: - case ISD::BRIND: - case ISD::BR_JT: - case ISD::BR_CC: - case ISD::BRCOND: - // Branches tweak the chain to include LastCALLSEQ_END - Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], - LastCALLSEQ_END); - Ops[0] = LegalizeOp(Ops[0]); - LastCALLSEQ_END = DAG.getEntryNode(); - break; case ISD::SHL: case ISD::SRL: case ISD::SRA: @@ -926,57 +828,66 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::ROTR: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[1].getValueType().isVector()) - Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), - Ops[1])); + if (!Ops[1].getValueType().isVector()) { + SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[1]); + HandleSDNode Handle(SAO); + LegalizeOp(SAO.getNode()); + Ops[1] = Handle.getValue(); + } break; case ISD::SRL_PARTS: case ISD::SRA_PARTS: case ISD::SHL_PARTS: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[2].getValueType().isVector()) - Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), - Ops[2])); + if (!Ops[2].getValueType().isVector()) { + SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[2]); + HandleSDNode Handle(SAO); + LegalizeOp(SAO.getNode()); + Ops[2] = Handle.getValue(); + } break; } - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(), - Ops.size()), 0); + SDNode *NewNode = DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); + if (NewNode != Node) { + DAG.ReplaceAllUsesWith(Node, NewNode, this); + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i)); + ReplacedNode(Node); + Node = NewNode; + } switch (Action) { case TargetLowering::Legal: - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - ResultVals.push_back(Result.getValue(i)); - break; + return; case TargetLowering::Custom: // FIXME: The handling for custom lowering with multiple results is // a complete mess. - Tmp1 = TLI.LowerOperation(Result, DAG); + Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Tmp1.getNode()) { + SmallVector ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { if (e == 1) ResultVals.push_back(Tmp1); else ResultVals.push_back(Tmp1.getValue(i)); } - break; + if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) { + DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this); + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]); + ReplacedNode(Node); + } + return; } // FALL THROUGH case TargetLowering::Expand: - ExpandNode(Result.getNode(), ResultVals); - break; + ExpandNode(Node); + return; case TargetLowering::Promote: - PromoteNode(Result.getNode(), ResultVals); - break; - } - if (!ResultVals.empty()) { - for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) { - if (ResultVals[i] != SDValue(Node, i)) - ResultVals[i] = LegalizeOp(ResultVals[i]); - AddLegalizedOperand(SDValue(Node, i), ResultVals[i]); - } - return ResultVals[Op.getResNo()]; + PromoteNode(Node); + return; } } @@ -987,160 +898,24 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Node->dump( &DAG); dbgs() << "\n"; #endif - assert(0 && "Do not know how to legalize this operator!"); + llvm_unreachable("Do not know how to legalize this operator!"); - case ISD::SRA: - case ISD::SRL: - case ISD::SHL: { - // Scalarize vector SRA/SRL/SHL. - EVT VT = Node->getValueType(0); - assert(VT.isVector() && "Unable to legalize non-vector shift"); - assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); - unsigned NumElem = VT.getVectorNumElements(); - - SmallVector Scalars; - for (unsigned Idx = 0; Idx < NumElem; Idx++) { - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - VT.getScalarType(), - Node->getOperand(0), DAG.getIntPtrConstant(Idx)); - SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - VT.getScalarType(), - Node->getOperand(1), DAG.getIntPtrConstant(Idx)); - Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, - VT.getScalarType(), Ex, Sh)); - } - Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), - &Scalars[0], Scalars.size()); - break; - } - - case ISD::BUILD_VECTOR: - switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) { - default: assert(0 && "This action is not supported yet!"); - case TargetLowering::Custom: - Tmp3 = TLI.LowerOperation(Result, DAG); - if (Tmp3.getNode()) { - Result = Tmp3; - break; - } - // FALLTHROUGH - case TargetLowering::Expand: - Result = ExpandBUILD_VECTOR(Result.getNode()); - break; - } - break; - case ISD::CALLSEQ_START: { - SDNode *CallEnd = FindCallEndFromCallStart(Node); - - // Recursively Legalize all of the inputs of the call end that do not lead - // to this call start. This ensures that any libcalls that need be inserted - // are inserted *before* the CALLSEQ_START. - {SmallPtrSet NodesLeadingTo; - for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i) - LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node, - NodesLeadingTo); - } - - // Now that we have legalized all of the inputs (which may have inserted - // libcalls), create the new CALLSEQ_START node. - Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. - - // Merge in the last call to ensure that this call starts after the last - // call ended. - if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { - Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Tmp1, LastCALLSEQ_END); - Tmp1 = LegalizeOp(Tmp1); - } - - // Do not try to legalize the target-specific arguments (#1+). - if (Tmp1 != Node->getOperand(0)) { - SmallVector Ops(Node->op_begin(), Node->op_end()); - Ops[0] = Tmp1; - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0], - Ops.size()), Result.getResNo()); - } - - // Remember that the CALLSEQ_START is legalized. - AddLegalizedOperand(Op.getValue(0), Result); - if (Node->getNumValues() == 2) // If this has a flag result, remember it. - AddLegalizedOperand(Op.getValue(1), Result.getValue(1)); - - // Now that the callseq_start and all of the non-call nodes above this call - // sequence have been legalized, legalize the call itself. During this - // process, no libcalls can/will be inserted, guaranteeing that no calls - // can overlap. - assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); - // Note that we are selecting this call! - LastCALLSEQ_END = SDValue(CallEnd, 0); - IsLegalizingCall = true; - - // Legalize the call, starting from the CALLSEQ_END. - LegalizeOp(LastCALLSEQ_END); - assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); - return Result; - } + case ISD::CALLSEQ_START: case ISD::CALLSEQ_END: - // If the CALLSEQ_START node hasn't been legalized first, legalize it. This - // will cause this node to be legalized as well as handling libcalls right. - if (LastCALLSEQ_END.getNode() != Node) { - LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0)); - DenseMap::iterator I = LegalizedNodes.find(Op); - assert(I != LegalizedNodes.end() && - "Legalizing the call start should have legalized this node!"); - return I->second; - } - - // Otherwise, the call start has been legalized and everything is going - // according to plan. Just legalize ourselves normally here. - Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. - // Do not try to legalize the target-specific arguments (#1+), except for - // an optional flag input. - if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){ - if (Tmp1 != Node->getOperand(0)) { - SmallVector Ops(Node->op_begin(), Node->op_end()); - Ops[0] = Tmp1; - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - &Ops[0], Ops.size()), - Result.getResNo()); - } - } else { - Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1)); - if (Tmp1 != Node->getOperand(0) || - Tmp2 != Node->getOperand(Node->getNumOperands()-1)) { - SmallVector Ops(Node->op_begin(), Node->op_end()); - Ops[0] = Tmp1; - Ops.back() = Tmp2; - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - &Ops[0], Ops.size()), - Result.getResNo()); - } - } - assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); - // This finishes up call legalization. - IsLegalizingCall = false; - - // If the CALLSEQ_END node has a flag, remember that we legalized it. - AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); - if (Node->getNumValues() == 2) - AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1)); - return Result.getValue(Op.getResNo()); + break; case ISD::LOAD: { LoadSDNode *LD = cast(Node); - Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain. - Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer. + Tmp1 = LD->getChain(); // Legalize the chain. + Tmp2 = LD->getBasePtr(); // Legalize the base pointer. ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { EVT VT = Node->getValueType(0); - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp2, LD->getOffset()), - Result.getResNo()); - Tmp3 = Result.getValue(0); - Tmp4 = Result.getValue(1); + Tmp3 = SDValue(Node, 0); + Tmp4 = SDValue(Node, 1); switch (TLI.getOperationAction(Node->getOpcode(), VT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned load and the target doesn't support it, // expand it. @@ -1148,20 +923,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast(Result.getNode()), - DAG, TLI); - Tmp3 = Result.getOperand(0); - Tmp4 = Result.getOperand(1); - Tmp3 = LegalizeOp(Tmp3); - Tmp4 = LegalizeOp(Tmp4); + ExpandUnalignedLoad(cast(Node), + DAG, TLI, Tmp3, Tmp4); } } break; case TargetLowering::Custom: Tmp1 = TLI.LowerOperation(Tmp3, DAG); if (Tmp1.getNode()) { - Tmp3 = LegalizeOp(Tmp1); - Tmp4 = LegalizeOp(Tmp1.getValue(1)); + Tmp3 = Tmp1; + Tmp4 = Tmp1.getValue(1); } break; case TargetLowering::Promote: { @@ -1172,17 +943,19 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); - Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1)); - Tmp4 = LegalizeOp(Tmp1.getValue(1)); + LD->isInvariant(), LD->getAlignment()); + Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1); + Tmp4 = Tmp1.getValue(1); break; } } - // Since loads produce two values, make sure to remember that we - // legalized both of them. - AddLegalizedOperand(SDValue(Node, 0), Tmp3); - AddLegalizedOperand(SDValue(Node, 1), Tmp4); - return Op.getResNo() ? Tmp4 : Tmp3; + if (Tmp4.getNode() != Node) { + assert(Tmp3.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4); + ReplacedNode(Node); + } + return; } EVT SrcVT = LD->getMemoryVT(); @@ -1213,9 +986,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ISD::LoadExtType NewExtType = ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + SDValue Result = + DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); Ch = Result.getValue(1); // The chain. @@ -1230,8 +1004,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result.getValueType(), Result, DAG.getValueType(SrcVT)); - Tmp1 = LegalizeOp(Result); - Tmp2 = LegalizeOp(Ch); + Tmp1 = Result; + Tmp2 = Ch; } else if (SrcWidth & (SrcWidth - 1)) { // If not loading a power-of-2 number of bits, expand as two loads. assert(!SrcVT.isVector() && "Unsupported extload!"); @@ -1274,7 +1048,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. - Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } else { // Big endian - avoid unaligned loads. // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 @@ -1304,29 +1078,25 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. - Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } - Tmp1 = LegalizeOp(Result); - Tmp2 = LegalizeOp(Ch); + Tmp2 = Ch; } else { switch (TLI.getLoadExtAction(ExtType, SrcVT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: isCustom = true; // FALLTHROUGH case TargetLowering::Legal: - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp2, LD->getOffset()), - Result.getResNo()); - Tmp1 = Result.getValue(0); - Tmp2 = Result.getValue(1); + Tmp1 = SDValue(Node, 0); + Tmp2 = SDValue(Node, 1); if (isCustom) { - Tmp3 = TLI.LowerOperation(Result, DAG); + Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Tmp3.getNode()) { - Tmp1 = LegalizeOp(Tmp3); - Tmp2 = LegalizeOp(Tmp3.getValue(1)); + Tmp1 = Tmp3; + Tmp2 = Tmp3.getValue(1); } } else { // If this is an unaligned load and the target doesn't support it, @@ -1337,12 +1107,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast(Result.getNode()), - DAG, TLI); - Tmp1 = Result.getOperand(0); - Tmp2 = Result.getOperand(1); - Tmp1 = LegalizeOp(Tmp1); - Tmp2 = LegalizeOp(Tmp2); + ExpandUnalignedLoad(cast(Node), + DAG, TLI, Tmp1, Tmp2); } } } @@ -1352,7 +1118,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + LD->isInvariant(), LD->getAlignment()); unsigned ExtendOp; switch (ExtType) { case ISD::EXTLOAD: @@ -1363,95 +1129,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; default: llvm_unreachable("Unexpected extend load type!"); } - Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Tmp1 = LegalizeOp(Result); // Relegalize new nodes. - Tmp2 = LegalizeOp(Load.getValue(1)); + Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Tmp2 = Load.getValue(1); break; } - // If this is a promoted vector load, and the vector element types are - // legal, then scalarize it. - if (ExtType == ISD::EXTLOAD && SrcVT.isVector() && - TLI.isTypeLegal(Node->getValueType(0).getScalarType())) { - SmallVector LoadVals; - SmallVector LoadChains; - unsigned NumElem = SrcVT.getVectorNumElements(); - unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; - - for (unsigned Idx=0; IdxgetValueType(0).getScalarType(), - Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride), - SrcVT.getScalarType(), - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); - - LoadVals.push_back(ScalarLoad.getValue(0)); - LoadChains.push_back(ScalarLoad.getValue(1)); - } - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &LoadChains[0], LoadChains.size()); - SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, - Node->getValueType(0), &LoadVals[0], LoadVals.size()); - - Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. - Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes. - break; - } - - // If this is a promoted vector load, and the vector element types are - // illegal, create the promoted vector from bitcasted segments. - if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) { - EVT MemElemTy = Node->getValueType(0).getScalarType(); - EVT SrcSclrTy = SrcVT.getScalarType(); - unsigned SizeRatio = - (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits()); - - SmallVector LoadVals; - SmallVector LoadChains; - unsigned NumElem = SrcVT.getVectorNumElements(); - unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; - - for (unsigned Idx=0; IdxgetPointerInfo().getWithOffset(Idx * Stride), - SrcVT.getScalarType(), - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); - if (TLI.isBigEndian()) { - // MSB (which is garbage, comes first) - LoadVals.push_back(ScalarLoad.getValue(0)); - for (unsigned i = 0; igetValueType(0), ValRes); - - Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. - Tmp2 = LegalizeOp(Result.getValue(0)); // Relegalize new nodes. - break; - - } + assert(!SrcVT.isVector() && + "Vector Loads are handled in LegalizeVectorOps"); // FIXME: This does not work for vectors on most targets. Sign- and // zero-extend operations are currently folded into extending loads, @@ -1461,10 +1145,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { "EXTLOAD should always be supported!"); // Turn the unsupported load into an EXTLOAD followed by an explicit // zero/sign extend inreg. - Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); SDValue ValRes; if (ExtType == ISD::SEXTLOAD) ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, @@ -1472,42 +1156,41 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result, DAG.getValueType(SrcVT)); else ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); - Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. - Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes. + Tmp1 = ValRes; + Tmp2 = Result.getValue(1); break; } } // Since loads produce two values, make sure to remember that we legalized // both of them. - AddLegalizedOperand(SDValue(Node, 0), Tmp1); - AddLegalizedOperand(SDValue(Node, 1), Tmp2); - return Op.getResNo() ? Tmp2 : Tmp1; + if (Tmp2.getNode() != Node) { + assert(Tmp1.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2); + ReplacedNode(Node); + } + break; } case ISD::STORE: { StoreSDNode *ST = cast(Node); - Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain. - Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer. + Tmp1 = ST->getChain(); + Tmp2 = ST->getBasePtr(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { - Result = SDValue(OptStore, 0); + ReplaceNode(ST, OptStore); break; } { - Tmp3 = LegalizeOp(ST->getValue()); - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp3, Tmp2, - ST->getOffset()), - Result.getResNo()); - + Tmp3 = ST->getValue(); EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned store and the target doesn't support it, // expand it. @@ -1515,27 +1198,31 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - Result = ExpandUnalignedStore(cast(Result.getNode()), - DAG, TLI); + ExpandUnalignedStore(cast(Node), + DAG, TLI, this); } break; case TargetLowering::Custom: - Tmp1 = TLI.LowerOperation(Result, DAG); - if (Tmp1.getNode()) Result = Tmp1; + Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Tmp1.getNode()) + ReplaceNode(SDValue(Node, 0), Tmp1); break; - case TargetLowering::Promote: + case TargetLowering::Promote: { assert(VT.isVector() && "Unknown legal promote case!"); Tmp3 = DAG.getNode(ISD::BITCAST, dl, TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); - Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, - ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + SDValue Result = + DAG.getStore(Tmp1, dl, Tmp3, Tmp2, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); break; } + } break; } } else { - Tmp3 = LegalizeOp(ST->getValue()); + Tmp3 = ST->getValue(); EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); @@ -1547,8 +1234,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits()); Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); - Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + SDValue Result = + DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); @@ -1602,17 +1291,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } // The order of the stores doesn't matter. - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + ReplaceNode(SDValue(Node, 0), Result); } else { - if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || - Tmp2 != ST->getBasePtr()) - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp3, Tmp2, - ST->getOffset()), - Result.getResNo()); - switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned store and the target doesn't support it, // expand it. @@ -1620,120 +1303,24 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - Result = ExpandUnalignedStore(cast(Result.getNode()), - DAG, TLI); + ExpandUnalignedStore(cast(Node), DAG, TLI, this); } break; case TargetLowering::Custom: - Result = TLI.LowerOperation(Result, DAG); + ReplaceNode(SDValue(Node, 0), + TLI.LowerOperation(SDValue(Node, 0), DAG)); break; case TargetLowering::Expand: - - EVT WideScalarVT = Tmp3.getValueType().getScalarType(); - EVT NarrowScalarVT = StVT.getScalarType(); - - if (StVT.isVector()) { - unsigned NumElem = StVT.getVectorNumElements(); - // The type of the data we want to save - EVT RegVT = Tmp3.getValueType(); - EVT RegSclVT = RegVT.getScalarType(); - // The type of data as saved in memory. - EVT MemSclVT = StVT.getScalarType(); - - bool RegScalarLegal = TLI.isTypeLegal(RegSclVT); - bool MemScalarLegal = TLI.isTypeLegal(MemSclVT); - - // We need to expand this store. If the register element type - // is legal then we can scalarize the vector and use - // truncating stores. - if (RegScalarLegal) { - // Cast floats into integers - unsigned ScalarSize = MemSclVT.getSizeInBits(); - EVT EltVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize); - - // Round odd types to the next pow of two. - if (!isPowerOf2_32(ScalarSize)) - ScalarSize = NextPowerOf2(ScalarSize); - - // Store Stride in bytes - unsigned Stride = ScalarSize/8; - // Extract each of the elements from the original vector - // and save them into memory individually. - SmallVector Stores; - for (unsigned Idx = 0; Idx < NumElem; Idx++) { - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - RegSclVT, Tmp3, DAG.getIntPtrConstant(Idx)); - - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(Stride)); - - // This scalar TruncStore may be illegal, but we lehalize it - // later. - SDValue Store = DAG.getTruncStore(Tmp1, dl, Ex, Tmp2, - ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, - isVolatile, isNonTemporal, Alignment); - - Stores.push_back(Store); - } - - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Stores[0], Stores.size()); - break; - } - - // The scalar register type is illegal. - // For example saving <2 x i64> -> <2 x i32> on a x86. - // In here we bitcast the value into a vector of smaller parts and - // save it using smaller scalars. - if (!RegScalarLegal && MemScalarLegal) { - // Store Stride in bytes - unsigned Stride = MemSclVT.getSizeInBits()/8; - - unsigned SizeRatio = - (RegSclVT.getSizeInBits() / MemSclVT.getSizeInBits()); - - EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), - MemSclVT, - SizeRatio * NumElem); - - // Cast the wide elem vector to wider vec with smaller elem type. - // Example <2 x i64> -> <4 x i32> - Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3); - - SmallVector Stores; - for (unsigned Idx=0; Idx < NumElem * SizeRatio; Idx++) { - // Extract the Ith element. - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx)); - // Bump pointer. - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(Stride)); - - // Store if, this element is: - // - First element on big endian, or - // - Last element on little endian - if (( TLI.isBigEndian() && (Idx % SizeRatio == 0)) || - ((!TLI.isBigEndian() && (Idx % SizeRatio == SizeRatio-1)))) { - SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2, - ST->getPointerInfo().getWithOffset(Idx*Stride), - isVolatile, isNonTemporal, Alignment); - Stores.push_back(Store); - } - } - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Stores[0], Stores.size()); - break; - } - - assert(false && "Unable to legalize the vector trunc store!"); - }// is vector - + assert(!StVT.isVector() && + "Vector Stores are handled in LegalizeVectorOps"); // TRUNCSTORE:i16 i32 -> STORE i16 assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!"); Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); - Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + SDValue Result = + DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); break; } } @@ -1741,17 +1328,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; } } - assert(Result.getValueType() == Op.getValueType() && - "Bad legalization!"); - - // Make sure that the generated code is itself legal. - if (Result != Op) - Result = LegalizeOp(Result); - - // Note that LegalizeOp may be reentered even from single-use nodes, which - // means that we always must cache transformed nodes. - AddLegalizedOperand(Op, Result); - return Result; } SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { @@ -1778,7 +1354,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { if (Op.getValueType().isVector()) return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(), - false, false, 0); + false, false, false, 0); return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), Vec.getValueType().getVectorElementType(), @@ -1826,7 +1402,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { // Finally, load the updated vector. return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo, - false, false, 0); + false, false, false, 0); } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { @@ -1876,7 +1452,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { StoreChain = DAG.getEntryNode(); // Result is a load from the stack slot. - return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0); + return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, + false, false, false, 0); } SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { @@ -1905,7 +1482,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { assert(FloatVT.isByteSized() && "Unsupported floating point type!"); // Load out a legal integer with the same sign bit as the float. SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } else { // Little endian SDValue LoadPtr = StackPtr; // The float may be wider than the integer we are going to load. Advance @@ -1916,7 +1493,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { LoadPtr, DAG.getIntPtrConstant(ByteOffset)); // Load a legal integer containing the sign bit. SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Move the sign bit to the top bit of the loaded integer. unsigned BitShift = LoadTy.getSizeInBits() - (FloatVT.getSizeInBits() - 8 * ByteOffset); @@ -1984,7 +1561,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, EVT OpVT = LHS.getValueType(); ISD::CondCode CCCode = cast(CC)->get(); switch (TLI.getCondCodeAction(CCCode, OpVT)) { - default: assert(0 && "Unknown condition code action!"); + default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: // Nothing to do. break; @@ -1992,7 +1569,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { - default: assert(0 && "Don't know how to expand this condition!"); + default: llvm_unreachable("Don't know how to expand this condition!"); case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break; case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break; case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break; @@ -2058,7 +1635,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, // Result is a load from the stack slot. if (SlotSize == DestSize) return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, - false, false, DestAlign); + false, false, false, DestAlign); assert(SlotSize < DestSize && "Unknown extension!"); return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, @@ -2081,7 +1658,7 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { false, false, 0); return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(SPFI), - false, false, 0); + false, false, false, 0); } @@ -2127,7 +1704,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // If all elements are constants, create a load from the constant pool. if (isConstant) { - std::vector CV; + SmallVector CV; for (unsigned i = 0, e = NumElems; i != e; ++i) { if (ConstantFPSDNode *V = dyn_cast(Node->getOperand(i))) { @@ -2155,7 +1732,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { unsigned Alignment = cast(CPIdx)->getAlignment(); return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, Alignment); + false, false, false, Alignment); } if (!MoreThanTwoValues) { @@ -2190,12 +1767,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); - // The input chain to this libcall is the entry node of the function. - // Legalizing the call will automatically add the previous call to the - // dependence. - SDValue InChain = DAG.getEntryNode(); - TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { @@ -2209,26 +1780,31 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + // By default, the input chain to this libcall is the entry node of the + // function. If the libcall is going to be emitted as a tail call then + // TLI.isUsedByReturnOnly will change it to the right chain if the return + // node which is being folded has a non-entry input chain. + SDValue InChain = DAG.getEntryNode(); + // isTailCall may be true since the callee does not reference caller stack // frame. Check if it's in the right position. - bool isTailCall = isInTailCallPosition(DAG, Node, TLI); + SDValue TCChain = InChain; + bool isTailCall = isInTailCallPosition(DAG, Node, TCChain, TLI); + if (isTailCall) + InChain = TCChain; + std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), isTailCall, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); if (!CallInfo.second.getNode()) // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); return CallInfo.first; } @@ -2254,15 +1830,10 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); std::pair CallInfo = TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, TLI.getLibcallCallingConv(LC), false, - /*isReturnValueUsed=*/true, + false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); - return CallInfo.first; } @@ -2272,7 +1843,6 @@ std::pair SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); SDValue InChain = Node->getOperand(0); TargetLowering::ArgListTy Args; @@ -2289,18 +1859,13 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); return CallInfo; } @@ -2311,7 +1876,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_PPCF128) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: assert(0 && "Unexpected request for libcall!"); + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; case MVT::f80: LC = Call_F80; break; @@ -2328,7 +1893,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I128) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: assert(0 && "Unexpected request for libcall!"); + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; case MVT::i32: LC = Call_I32; break; @@ -2343,7 +1908,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: assert(0 && "Unexpected request for libcall!"); + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; @@ -2388,7 +1953,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: assert(0 && "Unexpected request for libcall!"); + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; @@ -2426,21 +1991,16 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. DebugLoc dl = Node->getDebugLoc(); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); - - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); // Remainder is loaded back from the stack frame. - SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr, - MachinePointerInfo(), false, false, 0); + SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, + MachinePointerInfo(), false, false, false, 0); Results.push_back(CallInfo.first); Results.push_back(Rem); } @@ -2489,7 +2049,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, false, false, 0); // load the constructed double SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); // FP constant to bias correct the final result SDValue Bias = DAG.getConstantFP(isSigned ? BitsToDouble(0x4330000080000000ULL) : @@ -2611,7 +2171,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, // offset depending on the data type. uint64_t FF; switch (Op0.getValueType().getSimpleVT().SimpleTy) { - default: assert(0 && "Unsupported integer type!"); + default: llvm_unreachable("Unsupported integer type!"); case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) @@ -2629,13 +2189,15 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, if (DestVT == MVT::f32) FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, Alignment); + false, false, false, Alignment); else { - FudgeInReg = - LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, - DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - MVT::f32, false, false, Alignment)); + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, + DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), + MVT::f32, false, false, Alignment); + HandleSDNode Handle(Load); + LegalizeOp(Load.getNode()); + FudgeInReg = Handle.getValue(); } return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); @@ -2731,7 +2293,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { EVT SHVT = TLI.getShiftAmountTy(VT); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; switch (VT.getSimpleVT().SimpleTy) { - default: assert(0 && "Unhandled Expand type in BSWAP!"); + default: llvm_unreachable("Unhandled Expand type in BSWAP!"); case MVT::i16: Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); @@ -2788,7 +2350,7 @@ static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) { SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl) { switch (Opc) { - default: assert(0 && "Cannot expand this yet!"); + default: llvm_unreachable("Cannot expand this yet!"); case ISD::CTPOP: { EVT VT = Op.getValueType(); EVT ShVT = TLI.getShiftAmountTy(VT); @@ -2831,6 +2393,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, return Op; } + case ISD::CTLZ_ZERO_UNDEF: + // This trivially expands to CTLZ. + return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op); case ISD::CTLZ: { // for now, we do this: // x = x | (x >> 1); @@ -2852,6 +2417,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, Op = DAG.getNOT(dl, Op, VT); return DAG.getNode(ISD::CTPOP, dl, VT, Op); } + case ISD::CTTZ_ZERO_UNDEF: + // This trivially expands to CTTZ. + return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op); case ISD::CTTZ: { // for now, we use: { return popcount(~x & (x - 1)); } // unless the target has ctlz but not ctpop, in which case we use: @@ -2881,7 +2449,6 @@ std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { switch (Opc) { default: llvm_unreachable("Unhandled atomic intrinsic Expand!"); - break; case ISD::ATOMIC_SWAP: switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type for atomic!"); @@ -2959,14 +2526,16 @@ std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { return ExpandChainLibCall(LC, Node, false); } -void SelectionDAGLegalize::ExpandNode(SDNode *Node, - SmallVectorImpl &Results) { +void SelectionDAGLegalize::ExpandNode(SDNode *Node) { + SmallVector Results; DebugLoc dl = Node->getDebugLoc(); SDValue Tmp1, Tmp2, Tmp3, Tmp4; switch (Node->getOpcode()) { case ISD::CTPOP: case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl); Results.push_back(Tmp1); break; @@ -2986,7 +2555,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::PREFETCH: case ISD::VAEND: case ISD::EH_SJLJ_LONGJMP: - case ISD::EH_SJLJ_DISPATCHSETUP: // If the target didn't expand these, there's nothing to do, so just // preserve the chain and be done. Results.push_back(Node->getOperand(0)); @@ -3006,7 +2574,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy()), Args, DAG, dl); @@ -3083,7 +2651,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("abort", TLI.getPointerTy()), Args, DAG, dl); Results.push_back(CallResult.second); @@ -3166,7 +2734,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, unsigned Align = Node->getConstantOperandVal(3); SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, - MachinePointerInfo(V), false, false, 0); + MachinePointerInfo(V), + false, false, false, 0); SDValue VAList = VAListLoad; if (Align > TLI.getMinStackArgumentAlignment()) { @@ -3191,7 +2760,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, MachinePointerInfo(V), false, false, 0); // Load the actual argument out of the pointer VAList Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(), - false, false, 0)); + false, false, false, 0)); Results.push_back(Results[0].getValue(1)); break; } @@ -3202,7 +2771,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, const Value *VS = cast(Node->getOperand(4))->getValue(); Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0), Node->getOperand(2), MachinePointerInfo(VS), - false, false, 0); + false, false, false, 0); Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), MachinePointerInfo(VD), false, false, 0); Results.push_back(Tmp1); @@ -3236,15 +2805,57 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Node->getOperand(2), dl)); break; case ISD::VECTOR_SHUFFLE: { - SmallVector Mask; - cast(Node)->getMask(Mask); + SmallVector NewMask; + ArrayRef Mask = cast(Node)->getMask(); EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); - if (!TLI.isTypeLegal(EltVT)) - EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + if (!TLI.isTypeLegal(EltVT)) { + + EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); + + // BUILD_VECTOR operands are allowed to be wider than the element type. + // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it + if (NewEltVT.bitsLT(EltVT)) { + + // Convert shuffle node. + // If original node was v4i64 and the new EltVT is i32, + // cast operands to v8i32 and re-build the mask. + + // Calculate new VT, the size of the new VT should be equal to original. + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, + VT.getSizeInBits()/NewEltVT.getSizeInBits()); + assert(NewVT.bitsEq(VT)); + + // cast operands to new VT + Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0); + Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1); + + // Convert the shuffle mask + unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements(); + + // EltVT gets smaller + assert(factor > 0); + + for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { + if (Mask[i] < 0) { + for (unsigned fi = 0; fi < factor; ++fi) + NewMask.push_back(Mask[i]); + } + else { + for (unsigned fi = 0; fi < factor; ++fi) + NewMask.push_back(Mask[i]*factor+fi); + } + } + Mask = NewMask; + VT = NewVT; + } + EltVT = NewEltVT; + } unsigned NumElems = VT.getVectorNumElements(); - SmallVector Ops; + SmallVector Ops; for (unsigned i = 0; i != NumElems; ++i) { if (Mask[i] < 0) { Ops.push_back(DAG.getUNDEF(EltVT)); @@ -3253,14 +2864,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, unsigned Idx = Mask[i]; if (Idx < NumElems) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Node->getOperand(0), + Op0, DAG.getIntPtrConstant(Idx))); else Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Node->getOperand(1), + Op1, DAG.getIntPtrConstant(Idx - NumElems))); } + Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); + // We may have changed the BUILD_VECTOR type. Cast it back to the Node type. + Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1); Results.push_back(Tmp1); break; } @@ -3408,10 +3022,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, ConstantFPSDNode *CFP = cast(Node); // Check to see if this FP immediate is already legal. // If this is a legal constant, turn it into a TargetConstantFP node. - if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) - Results.push_back(SDValue(Node, 0)); - else - Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI)); + if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) + Results.push_back(ExpandConstantFP(CFP, true)); break; } case ISD::EHSELECTION: { @@ -3423,13 +3035,23 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, break; } case ISD::EXCEPTIONADDR: { - unsigned Reg = TLI.getExceptionAddressRegister(); + unsigned Reg = TLI.getExceptionPointerRegister(); assert(Reg && "Can't expand to unknown register!"); Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg, Node->getValueType(0))); Results.push_back(Results[0].getValue(1)); break; } + case ISD::FSUB: { + EVT VT = Node->getValueType(0); + assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) && + TLI.isOperationLegalOrCustom(ISD::FNEG, VT) && + "Don't know how to expand this FP subtraction!"); + Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); + Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1); + Results.push_back(Tmp1); + break; + } case ISD::SUB: { EVT VT = Node->getValueType(0); assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) && @@ -3657,6 +3279,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, DAG.getIntPtrConstant(0)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, DAG.getIntPtrConstant(1)); + // Ret is a node with an illegal type. Because such things are not + // generally permitted during this phase of legalization, delete the + // node. The above EXTRACT_ELEMENT nodes should have been folded. + DAG.DeleteNode(Ret.getNode()); } if (isSigned) { @@ -3797,7 +3423,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl); - LastCALLSEQ_END = DAG.getEntryNode(); assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); @@ -3807,6 +3432,35 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(Tmp1); break; } + case ISD::BUILD_VECTOR: + Results.push_back(ExpandBUILD_VECTOR(Node)); + break; + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: { + // Scalarize vector SRA/SRL/SHL. + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Unable to legalize non-vector shift"); + assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); + unsigned NumElem = VT.getVectorNumElements(); + + SmallVector Scalars; + for (unsigned Idx = 0; Idx < NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(0), DAG.getIntPtrConstant(Idx)); + SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(1), DAG.getIntPtrConstant(Idx)); + Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, + VT.getScalarType(), Ex, Sh)); + } + SDValue Result = + DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), + &Scalars[0], Scalars.size()); + ReplaceNode(SDValue(Node, 0), Result); + break; + } case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: @@ -3817,13 +3471,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: // FIXME: Custom lowering for these operations shouldn't return null! - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - Results.push_back(SDValue(Node, i)); break; } + + // Replace the original node with the legalized result. + if (!Results.empty()) + ReplaceNode(Node, Results.data()); } -void SelectionDAGLegalize::PromoteNode(SDNode *Node, - SmallVectorImpl &Results) { + +void SelectionDAGLegalize::PromoteNode(SDNode *Node) { + SmallVector Results; EVT OVT = Node->getValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || Node->getOpcode() == ISD::SINT_TO_FP || @@ -3835,20 +3492,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, SDValue Tmp1, Tmp2, Tmp3; switch (Node->getOpcode()) { case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: // Zero extend the argument. Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); - // Perform the larger operation. + // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is + // already the correct result. Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); if (Node->getOpcode() == ISD::CTTZ) { - //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT) + // FIXME: This should set a bit in the zero extended value instead. Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT), ISD::SETEQ); Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); - } else if (Node->getOpcode() == ISD::CTLZ) { + } else if (Node->getOpcode() == ISD::CTLZ || + Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, DAG.getConstant(NVT.getSizeInBits() - @@ -3877,6 +3538,33 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, Node->getOpcode() == ISD::SINT_TO_FP, dl); Results.push_back(Tmp1); break; + case ISD::VAARG: { + SDValue Chain = Node->getOperand(0); // Get the chain. + SDValue Ptr = Node->getOperand(1); // Get the pointer. + + unsigned TruncOp; + if (OVT.isVector()) { + TruncOp = ISD::BITCAST; + } else { + assert(OVT.isInteger() + && "VAARG promotion is supported only for vectors or integer types"); + TruncOp = ISD::TRUNCATE; + } + + // Perform the larger operation, then convert back + Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2), + Node->getConstantOperandVal(3)); + Chain = Tmp1.getValue(1); + + Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1); + + // Modified the chain result - switch anything that used the old chain to + // use the new one. + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + ReplacedNode(Node); + break; + } case ISD::AND: case ISD::OR: case ISD::XOR: { @@ -3924,8 +3612,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, break; } case ISD::VECTOR_SHUFFLE: { - SmallVector Mask; - cast(Node)->getMask(Mask); + ArrayRef Mask = cast(Node)->getMask(); // Cast the two input vectors. Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0)); @@ -3950,7 +3637,31 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, Tmp1, Tmp2, Node->getOperand(2))); break; } + case ISD::FDIV: + case ISD::FREM: + case ISD::FPOW: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp3, DAG.getIntPtrConstant(0))); + break; } + case ISD::FLOG2: + case ISD::FEXP2: + case ISD::FLOG: + case ISD::FEXP: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp2, DAG.getIntPtrConstant(0))); + break; + } + } + + // Replace the original node with the legalized result. + if (!Results.empty()) + ReplaceNode(Node, Results.data()); } // SelectionDAG::Legalize - This is the entry point for the file. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 7c1cc69d6a2f..e3938968b205 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -479,8 +479,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { if (L->getExtensionType() == ISD::NON_EXTLOAD) { NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), - L->getPointerInfo(), NVT, - L->isVolatile(), L->isNonTemporal(), L->getAlignment()); + L->getPointerInfo(), NVT, L->isVolatile(), + L->isNonTemporal(), false, L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -492,7 +492,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getMemoryVT(), dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), L->getMemoryVT(), L->isVolatile(), - L->isNonTemporal(), L->getAlignment()); + L->isNonTemporal(), false, L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -672,7 +672,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, case ISD::SETUEQ: LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; break; - default: assert(false && "Do not know how to soften this setcc!"); + default: llvm_unreachable("Do not know how to soften this setcc!"); } } @@ -1212,7 +1212,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, switch (SrcVT.getSimpleVT().SimpleTy) { default: - assert(false && "Unsupported UINT_TO_FP!"); + llvm_unreachable("Unsupported UINT_TO_FP!"); case MVT::i32: Parts = TwoE32; break; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index a5c4c2ded4c5..95ddb1e0f6fb 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -20,7 +20,6 @@ #include "LegalizeTypes.h" #include "llvm/DerivedTypes.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -57,8 +56,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::Constant: Res = PromoteIntRes_Constant(N); break; case ISD::CONVERT_RNDSAT: Res = PromoteIntRes_CONVERT_RNDSAT(N); break; + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break; case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break; + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; @@ -211,13 +212,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { DebugLoc dl = N->getDebugLoc(); switch (getTypeAction(InVT)) { - default: - assert(false && "Unknown type action!"); - break; case TargetLowering::TypeLegal: break; case TargetLowering::TypePromoteInteger: - if (NOutVT.bitsEq(NInVT)) + if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector()) // The input promotes to the same size. Convert the promoted value. return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp)); break; @@ -251,9 +249,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp); } case TargetLowering::TypeWidenVector: - if (OutVT.bitsEq(NInVT)) - // The input is widened to the same size. Convert to the widened value. - return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp)); + // The input is widened to the same size. Convert to the widened value. + // Make sure that the outgoing value is not a vector, because this would + // make us bitcast between two vectors which are legalized in different ways. + if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector()) + return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp)); } return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, @@ -312,7 +312,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { DebugLoc dl = N->getDebugLoc(); EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); - Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op); + Op = DAG.getNode(N->getOpcode(), dl, NVT, Op); // Subtract off the extra leading bits in the bigger type. return DAG.getNode(ISD::SUB, dl, NVT, Op, DAG.getConstant(NVT.getSizeInBits() - @@ -330,13 +330,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); DebugLoc dl = N->getDebugLoc(); - // The count is the same in the promoted type except if the original - // value was zero. This can be handled by setting the bit just off - // the top of the original type. - APInt TopBit(NVT.getSizeInBits(), 0); - TopBit.setBit(OVT.getSizeInBits()); - Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); - return DAG.getNode(ISD::CTTZ, dl, NVT, Op); + if (N->getOpcode() == ISD::CTTZ) { + // The count is the same in the promoted type except if the original + // value was zero. This can be handled by setting the bit just off + // the top of the original type. + APInt TopBit(NVT.getSizeInBits(), 0); + TopBit.setBit(OVT.getSizeInBits()); + Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); + } + return DAG.getNode(N->getOpcode(), dl, NVT, Op); } SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -486,7 +488,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { - SDValue Mask = GetPromotedInteger(N->getOperand(0)); + SDValue Mask = N->getOperand(0); + EVT OpTy = N->getOperand(1).getValueType(); + + // Promote all the way up to the canonical SetCC type. + Mask = PromoteTargetBoolean(Mask, TLI.getSetCCResultType(OpTy)); SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); return DAG.getNode(ISD::VSELECT, N->getDebugLoc(), @@ -1098,8 +1104,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break; case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break; + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; @@ -1171,7 +1179,6 @@ std::pair DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { switch (Opc) { default: llvm_unreachable("Unhandled atomic intrinsic Expand!"); - break; case ISD::ATOMIC_SWAP: switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type for atomic!"); @@ -1355,7 +1362,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(N->getOperand(1), KnownZero, KnownOne); // If we don't know anything about the high bits, exit. if (((KnownZero|KnownOne) & HighBitMask) == 0) @@ -1390,15 +1397,15 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { } } -#if 0 - // FIXME: This code is broken for shifts with a zero amount! // If we know that all of the high bits of the shift amount are zero, then we // can do this as a couple of simple shifts. if ((KnownZero & HighBitMask) == HighBitMask) { - // Compute 32-amt. - SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy, - DAG.getConstant(NVTBits, ShTy), - Amt); + // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined + // shift if x is zero. We can use XOR here because x is known to be smaller + // than 32. + SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt, + DAG.getConstant(NVTBits-1, ShTy)); + unsigned Op1, Op2; switch (N->getOpcode()) { default: llvm_unreachable("Unknown shift"); @@ -1407,13 +1414,23 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break; } - Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt); - Hi = DAG.getNode(ISD::OR, NVT, - DAG.getNode(Op1, NVT, InH, Amt), - DAG.getNode(Op2, NVT, InL, Amt2)); + // When shifting right the arithmetic for Lo and Hi is swapped. + if (N->getOpcode() != ISD::SHL) + std::swap(InL, InH); + + // Use a little trick to get the bits that move from Lo to Hi. First + // shift by one bit. + SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, ShTy)); + // Then compute the remaining shift with amount-1. + SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2); + + Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt); + Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2); + + if (N->getOpcode() != ISD::SHL) + std::swap(Hi, Lo); return true; } -#endif return false; } @@ -1493,8 +1510,6 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); return true; } - - return false; } void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, @@ -1702,8 +1717,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi, DAG.getConstant(0, NVT), ISD::SETNE); - SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo); - SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi); + SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi); Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ, DAG.getNode(ISD::ADD, dl, NVT, LoLZ, @@ -1732,8 +1747,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, DAG.getConstant(0, NVT), ISD::SETNE); - SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo); - SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi); + SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi); Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ, DAG.getNode(ISD::ADD, dl, NVT, HiLZ, @@ -1778,6 +1793,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); + bool isInvariant = N->isInvariant(); DebugLoc dl = N->getDebugLoc(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -1808,7 +1824,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } else if (TLI.isLittleEndian()) { // Little-endian - low bits are at low addresses. Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, isInvariant, Alignment); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2305,12 +2321,14 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT); std::pair CallInfo = TLI.LowerCallTo(Chain, RetTy, true, false, false, false, - 0, TLI.getLibcallCallingConv(LC), false, - true, Func, Args, DAG, dl); + 0, TLI.getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Func, Args, DAG, dl); SplitInteger(CallInfo.first, Lo, Hi); SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2, DAG.getConstant(0, PtrVT), ISD::SETNE); @@ -2781,7 +2799,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { else if (SrcVT == MVT::i128) FF = APInt(32, F32TwoE128); else - assert(false && "Unsupported UINT_TO_FP!"); + llvm_unreachable("Unsupported UINT_TO_FP!"); // Check whether the sign bit is set. SDValue Lo, Hi; @@ -2926,38 +2944,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - SDValue Op0 = N->getOperand(1); - SDValue Op1 = N->getOperand(1); - assert(Op0.getValueType() == Op1.getValueType() && - "Invalid input vector types"); - EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + EVT InElemTy = OutVT.getVectorElementType(); EVT OutElemTy = NOutVT.getVectorElementType(); - unsigned NumElem0 = Op0.getValueType().getVectorNumElements(); - unsigned NumElem1 = Op1.getValueType().getVectorNumElements(); + unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements(); unsigned NumOutElem = NOutVT.getVectorNumElements(); - assert(NumElem0 + NumElem1 == NumOutElem && - "Invalid number of incoming elements"); + unsigned NumOperands = N->getNumOperands(); + assert(NumElem * NumOperands == NumOutElem && + "Unexpected number of elements"); // Take the elements from the first vector. SmallVector Ops(NumOutElem); - for (unsigned i = 0; i < NumElem0; ++i) { - SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - Op0.getValueType().getScalarType(), Op0, - DAG.getIntPtrConstant(i)); - Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); - } - - // Take the elements from the second vector - for (unsigned i = 0; i < NumElem1; ++i) { - SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - Op1.getValueType().getScalarType(), Op1, - DAG.getIntPtrConstant(i)); - Ops[i + NumElem0] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); + for (unsigned i = 0; i < NumOperands; ++i) { + SDValue Op = N->getOperand(i); + for (unsigned j = 0; j < NumElem; ++j) { + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + InElemTy, Op, DAG.getIntPtrConstant(j)); + Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); + } } return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index a4bb577433cc..439aa4de5cf5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -222,8 +222,6 @@ bool DAGTypeLegalizer::run() { for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { EVT ResultVT = N->getValueType(i); switch (getTypeAction(ResultVT)) { - default: - assert(false && "Unknown action!"); case TargetLowering::TypeLegal: break; // The following calls must take care of *all* of the node's results, @@ -275,8 +273,6 @@ bool DAGTypeLegalizer::run() { EVT OpVT = N->getOperand(i).getValueType(); switch (getTypeAction(OpVT)) { - default: - assert(false && "Unknown action!"); case TargetLowering::TypeLegal: continue; // The following calls must either replace all of the node's results @@ -752,7 +748,11 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { - assert(Result.getValueType() == Op.getValueType().getVectorElementType() && + // Note that in some cases vector operation operands may be greater than + // the vector element type. For example BUILD_VECTOR of type <1 x i1> with + // a constant i8 operand. + assert(Result.getValueType().getSizeInBits() >= + Op.getValueType().getVectorElementType().getSizeInBits() && "Invalid type for scalarized vector"); AnalyzeNewValue(Result); @@ -889,7 +889,7 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, MachinePointerInfo(), false, false, 0); // Result is a load from the stack slot. return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } /// CustomLowerNode - Replace the node's results with custom code provided @@ -1056,8 +1056,9 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); std::pair CallInfo = TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, TLI.getLibcallCallingConv(LC), false, - /*isReturnValueUsed=*/true, + false, 0, TLI.getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); return CallInfo.first; } @@ -1084,12 +1085,11 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*isReturnValueUsed=*/true, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); return CallInfo; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index abacdac686bc..e8664458e9a6 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -521,6 +521,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N); + SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); SDValue ScalarizeVecRes_SETCC(SDNode *N); @@ -633,6 +634,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); + SDValue WidenVecOp_SETCC(SDNode* N); SDValue WidenVecOp_Convert(SDNode *N); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 8e7e4985e4d0..a8ff7c65abde 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -21,7 +21,6 @@ #include "LegalizeTypes.h" #include "llvm/Target/TargetData.h" -#include "llvm/CodeGen/PseudoSourceValue.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -46,8 +45,6 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Handle some special cases efficiently. switch (getTypeAction(InVT)) { - default: - assert(false && "Unknown type action!"); case TargetLowering::TypeLegal: case TargetLowering::TypePromoteInteger: break; @@ -130,7 +127,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { false, false, 0); // Load the first half from the stack slot. - Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0); + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, + false, false, false, 0); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; @@ -140,7 +138,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo.getWithOffset(IncrementSize), false, - false, MinAlign(Alignment, IncrementSize)); + false, false, MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. if (TLI.isBigEndian()) @@ -212,11 +210,12 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, isInvariant, Alignment); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -224,7 +223,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - isVolatile, isNonTemporal, + isVolatile, isNonTemporal, isInvariant, MinAlign(Alignment, IncrementSize)); // Build a factor node to remember that this load is independent of the diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index f815b00db5d6..3ae8345bd198 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -64,6 +64,8 @@ class VectorLegalizer { // Implement vselect in terms of XOR, AND, OR when blend is not supported // by the target. SDValue ExpandVSELECT(SDValue Op); + SDValue ExpandLoad(SDValue Op); + SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); // Implements vector promotion; this is essentially just bitcasting the // operands to a different type and bitcasting the result back to the @@ -124,6 +126,33 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0); + if (Op.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast(Op.getNode()); + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { + if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT())) + return TranslateLegalizeResults(Op, Result); + Changed = true; + return LegalizeOp(ExpandLoad(Op)); + } + } else if (Op.getOpcode() == ISD::STORE) { + StoreSDNode *ST = cast(Op.getNode()); + EVT StVT = ST->getMemoryVT(); + EVT ValVT = ST->getValue().getValueType(); + if (StVT.isVector() && ST->isTruncatingStore()) + switch (TLI.getTruncStoreAction(ValVT, StVT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + return TranslateLegalizeResults(Op, Result); + case TargetLowering::Custom: + Changed = true; + return LegalizeOp(TLI.LowerOperation(Result, DAG)); + case TargetLowering::Expand: + Changed = true; + return LegalizeOp(ExpandStore(Op)); + } + } + bool HasVectorValue = false; for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); J != E; @@ -156,8 +185,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::SRL: case ISD::ROTL: case ISD::ROTR: - case ISD::CTTZ: case ISD::CTLZ: + case ISD::CTTZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTPOP: case ISD::SELECT: case ISD::VSELECT: @@ -262,6 +293,97 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { return DAG.getNode(ISD::BITCAST, dl, VT, Op); } + +SDValue VectorLegalizer::ExpandLoad(SDValue Op) { + DebugLoc dl = Op.getDebugLoc(); + LoadSDNode *LD = cast(Op.getNode()); + SDValue Chain = LD->getChain(); + SDValue BasePTR = LD->getBasePtr(); + EVT SrcVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + + SmallVector LoadVals; + SmallVector LoadChains; + unsigned NumElem = SrcVT.getVectorNumElements(); + unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; + + for (unsigned Idx=0; IdxgetValueType(0).getScalarType(), + Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcVT.getScalarType(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + + BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, + DAG.getIntPtrConstant(Stride)); + + LoadVals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &LoadChains[0], LoadChains.size()); + SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, + Op.getNode()->getValueType(0), &LoadVals[0], LoadVals.size()); + + AddLegalizedOperand(Op.getValue(0), Value); + AddLegalizedOperand(Op.getValue(1), NewChain); + + return (Op.getResNo() ? NewChain : Value); +} + +SDValue VectorLegalizer::ExpandStore(SDValue Op) { + DebugLoc dl = Op.getDebugLoc(); + StoreSDNode *ST = cast(Op.getNode()); + SDValue Chain = ST->getChain(); + SDValue BasePTR = ST->getBasePtr(); + SDValue Value = ST->getValue(); + EVT StVT = ST->getMemoryVT(); + + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + + unsigned NumElem = StVT.getVectorNumElements(); + // The type of the data we want to save + EVT RegVT = Value.getValueType(); + EVT RegSclVT = RegVT.getScalarType(); + // The type of data as saved in memory. + EVT MemSclVT = StVT.getScalarType(); + + // Cast floats into integers + unsigned ScalarSize = MemSclVT.getSizeInBits(); + + // Round odd types to the next pow of two. + if (!isPowerOf2_32(ScalarSize)) + ScalarSize = NextPowerOf2(ScalarSize); + + // Store Stride in bytes + unsigned Stride = ScalarSize/8; + // Extract each of the elements from the original vector + // and save them into memory individually. + SmallVector Stores; + for (unsigned Idx = 0; Idx < NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + RegSclVT, Value, DAG.getIntPtrConstant(Idx)); + + // This scalar TruncStore may be illegal, but we legalize it later. + SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, + ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, + isVolatile, isNonTemporal, Alignment); + + BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, + DAG.getIntPtrConstant(Stride)); + + Stores.push_back(Store); + } + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + AddLegalizedOperand(Op, TF); + return TF; +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. @@ -274,10 +396,12 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // If we can't even use the basic vector operations of // AND,OR,XOR, we will have to scalarize the op. - if (!TLI.isOperationLegalOrCustom(ISD::AND, VT) || - !TLI.isOperationLegalOrCustom(ISD::XOR, VT) || - !TLI.isOperationLegalOrCustom(ISD::OR, VT)) - return DAG.UnrollVectorOp(Op.getNode()); + // Notice that the operation may be 'promoted' which means that it is + // 'bitcasted' to another type which is handled. + if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits() && "Invalid mask size"); @@ -301,9 +425,9 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { DebugLoc DL = Op.getDebugLoc(); // Make sure that the SINT_TO_FP and SRL instructions are available. - if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) || - !TLI.isOperationLegalOrCustom(ISD::SRL, VT)) - return DAG.UnrollVectorOp(Op.getNode()); + if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); EVT SVT = VT.getScalarType(); assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) && diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 107a42b2951c..5f23f01dafb4 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -21,7 +21,6 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -59,6 +58,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; + case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break; case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; @@ -194,7 +194,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), - N->getOriginalAlignment()); + N->isInvariant(), N->getOriginalAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -227,6 +227,37 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { return InOp; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { + SDValue Cond = GetScalarizedVector(N->getOperand(0)); + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false); + TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true); + if (ScalarBool != VecBool) { + EVT CondVT = Cond.getValueType(); + switch (ScalarBool) { + case TargetLowering::UndefinedBooleanContent: + break; + case TargetLowering::ZeroOrOneBooleanContent: + assert(VecBool == TargetLowering::UndefinedBooleanContent || + VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent); + // Vector read from all ones, scalar expects a single 1 so mask. + Cond = DAG.getNode(ISD::AND, N->getDebugLoc(), CondVT, + Cond, DAG.getConstant(1, CondVT)); + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + assert(VecBool == TargetLowering::UndefinedBooleanContent || + VecBool == TargetLowering::ZeroOrOneBooleanContent); + // Vector reads from a one, scalar from all ones so sign extend. + Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), CondVT, + Cond, DAG.getValueType(MVT::i1)); + break; + } + } + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), Cond, LHS, + GetScalarizedVector(N->getOperand(2))); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(1)); return DAG.getNode(ISD::SELECT, N->getDebugLoc(), @@ -405,6 +436,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { N->dump(&DAG); dbgs() << "\n"); SDValue Lo, Hi; + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; switch (N->getOpcode()) { default: @@ -442,8 +477,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::ANY_EXTEND: case ISD::CONVERT_RNDSAT: case ISD::CTLZ: - case ISD::CTPOP: case ISD::CTTZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTPOP: case ISD::FABS: case ISD::FCEIL: case ISD::FCOS: @@ -677,7 +714,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Load the Lo part from the stack slot. Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; @@ -686,7 +723,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, MinAlign(Alignment, IncrementSize)); + false, false, false, MinAlign(Alignment, IncrementSize)); } void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, @@ -713,20 +750,21 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, unsigned Alignment = LD->getOriginalAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); EVT LoMemVT, HiMemVT; GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, - Alignment); + isInvariant, Alignment); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVolatile, isNonTemporal, Alignment); + HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment); // Build a factor node to remember that this load is independent of the // other one. @@ -773,46 +811,18 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, DebugLoc dl = N->getDebugLoc(); GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); - // Split the input. + // If the input also splits, handle it directly for a compile time speedup. + // Otherwise split it by hand. EVT InVT = N->getOperand(0).getValueType(); - switch (getTypeAction(InVT)) { - default: llvm_unreachable("Unexpected type action!"); - case TargetLowering::TypeLegal: { + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) { + GetSplitVector(N->getOperand(0), Lo, Hi); + } else { EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), LoVT.getVectorNumElements()); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), DAG.getIntPtrConstant(0)); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - break; - } - case TargetLowering::TypePromoteInteger: { - SDValue InOp = GetPromotedInteger(N->getOperand(0)); - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), - InOp.getValueType().getVectorElementType(), - LoVT.getVectorNumElements()); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - break; - } - case TargetLowering::TypeSplitVector: - GetSplitVector(N->getOperand(0), Lo, Hi); - break; - case TargetLowering::TypeWidenVector: { - // If the result needs to be split and the input needs to be widened, - // the two types must have different lengths. Use the widened result - // and extract from it to do the split. - SDValue InOp = GetWidenedVector(N->getOperand(0)); - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - break; - } } if (N->getOpcode() == ISD::FP_ROUND) { @@ -1239,6 +1249,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; + case ISD::VSELECT: case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; case ISD::SETCC: Res = WidenVecRes_SETCC(N); break; @@ -1590,12 +1601,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { DebugLoc dl = N->getDebugLoc(); switch (getTypeAction(InVT)) { - default: - assert(false && "Unknown type action!"); - break; case TargetLowering::TypeLegal: break; case TargetLowering::TypePromoteInteger: + // If the incoming type is a vector that is being promoted, then + // we know that the elements are arranged differently and that we + // must perform the conversion using a stack slot. + if (InVT.isVector()) + break; + // If the InOp is promoted to the same size, convert it. Otherwise, // fall out of the switch and widen the promoted input. InOp = GetPromotedInteger(InOp); @@ -1928,7 +1942,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDValue InOp2 = GetWidenedVector(N->getOperand(2)); assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, Cond1, InOp1, InOp2); } @@ -2032,6 +2046,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; + case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; case ISD::FP_EXTEND: case ISD::FP_TO_SINT: @@ -2165,6 +2180,32 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { MVT::Other,&StChain[0],StChain.size()); } +SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { + SDValue InOp0 = GetWidenedVector(N->getOperand(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + DebugLoc dl = N->getDebugLoc(); + + // WARNING: In this code we widen the compare instruction with garbage. + // This garbage may contain denormal floats which may be slow. Is this a real + // concern ? Should we zero the unused lanes if this is a float compare ? + + // Get a new SETCC node to compare the newly widened operands. + // Only some of the compared elements are legal. + EVT SVT = TLI.getSetCCResultType(InOp0.getValueType()); + SDValue WideSETCC = DAG.getNode(ISD::SETCC, N->getDebugLoc(), + SVT, InOp0, InOp1, N->getOperand(2)); + + // Extract the needed results from the result vector. + EVT ResVT = EVT::getVectorVT(*DAG.getContext(), + SVT.getVectorElementType(), + N->getValueType(0).getVectorNumElements()); + SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, + ResVT, WideSETCC, DAG.getIntPtrConstant(0)); + + return PromoteTargetBoolean(CC, N->getValueType(0)); +} + + //===----------------------------------------------------------------------===// // Vector Widening Utilities //===----------------------------------------------------------------------===// @@ -2276,6 +2317,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector &LdChain, unsigned Align = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; // Difference @@ -2285,7 +2327,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector &LdChain, EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), - isVolatile, isNonTemporal, Align); + isVolatile, isNonTemporal, isInvariant, Align); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction @@ -2323,18 +2365,37 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector &LdChain, BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(Increment)); + SDValue L; if (LdWidth < NewVTWidth) { // Our current type we are using is too large, find a better size NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); NewVTWidth = NewVT.getSizeInBits(); - } - - SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, + L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, MinAlign(Align, Increment)); - LdChain.push_back(LdOp.getValue(1)); - LdOps.push_back(LdOp); + isNonTemporal, isInvariant, + MinAlign(Align, Increment)); + LdChain.push_back(L.getValue(1)); + if (L->getValueType(0).isVector()) { + SmallVector Loads; + Loads.push_back(L); + unsigned size = L->getValueSizeInBits(0); + while (size < LdOp->getValueSizeInBits(0)) { + Loads.push_back(DAG.getUNDEF(L->getValueType(0))); + size += L->getValueSizeInBits(0); + } + L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), + &Loads[0], Loads.size()); + } + } else { + L = DAG.getLoad(NewVT, dl, Chain, BasePtr, + LD->getPointerInfo().getWithOffset(Offset), isVolatile, + isNonTemporal, isInvariant, MinAlign(Align, Increment)); + LdChain.push_back(L.getValue(1)); + } + + LdOps.push_back(L); + LdWidth -= NewVTWidth; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp new file mode 100644 index 000000000000..ff0136e08cd9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -0,0 +1,657 @@ +//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ResourcePriorityQueue class, which is a +// SchedulingPriorityQueue that prioritizes instructions using DFA state to +// reduce the length of the critical path through the basic block +// on VLIW platforms. +// The scheduler is basically a top-down adaptable list scheduler with DFA +// resource tracking added to the cost function. +// DFA is queried as a state machine to model "packets/bundles" during +// schedule. Currently packets/bundles are discarded at the end of +// scheduling, affecting only order of instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "scheduler" +#include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLowering.h" + +using namespace llvm; + +static cl::opt DisableDFASched("disable-dfa-sched", cl::Hidden, + cl::ZeroOrMore, cl::init(false), + cl::desc("Disable use of DFA during scheduling")); + +static cl::opt RegPressureThreshold( + "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5), + cl::desc("Track reg pressure and switch priority to in-depth")); + + +ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) : + Picker(this), + InstrItins(IS->getTargetLowering().getTargetMachine().getInstrItineraryData()) +{ + TII = IS->getTargetLowering().getTargetMachine().getInstrInfo(); + TRI = IS->getTargetLowering().getTargetMachine().getRegisterInfo(); + TLI = &IS->getTargetLowering(); + + const TargetMachine &tm = (*IS->MF).getTarget(); + ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL); + // This hard requirment could be relaxed, but for now + // do not let it procede. + assert (ResourcesModel && "Unimplemented CreateTargetScheduleState."); + + unsigned NumRC = TRI->getNumRegClasses(); + RegLimit.resize(NumRC); + RegPressure.resize(NumRC); + std::fill(RegLimit.begin(), RegLimit.end(), 0); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) + RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF); + + ParallelLiveRanges = 0; + HorizontalVerticalBalance = 0; +} + +unsigned +ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { + unsigned NumberDeps = 0; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + + SUnit *PredSU = I->getSUnit(); + const SDNode *ScegN = PredSU->getNode(); + + if (!ScegN) + continue; + + // If value is passed to CopyToReg, it is probably + // live outside BB. + switch (ScegN->getOpcode()) { + default: break; + case ISD::TokenFactor: break; + case ISD::CopyFromReg: NumberDeps++; break; + case ISD::CopyToReg: break; + case ISD::INLINEASM: break; + } + if (!ScegN->isMachineOpcode()) + continue; + + for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { + EVT VT = ScegN->getValueType(i); + if (TLI->isTypeLegal(VT) + && (TLI->getRegClassFor(VT)->getID() == RCId)) { + NumberDeps++; + break; + } + } + } + return NumberDeps; +} + +unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, + unsigned RCId) { + unsigned NumberDeps = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + + SUnit *SuccSU = I->getSUnit(); + const SDNode *ScegN = SuccSU->getNode(); + if (!ScegN) + continue; + + // If value is passed to CopyToReg, it is probably + // live outside BB. + switch (ScegN->getOpcode()) { + default: break; + case ISD::TokenFactor: break; + case ISD::CopyFromReg: break; + case ISD::CopyToReg: NumberDeps++; break; + case ISD::INLINEASM: break; + } + if (!ScegN->isMachineOpcode()) + continue; + + for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { + const SDValue &Op = ScegN->getOperand(i); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); + if (TLI->isTypeLegal(VT) + && (TLI->getRegClassFor(VT)->getID() == RCId)) { + NumberDeps++; + break; + } + } + } + return NumberDeps; +} + +static unsigned numberCtrlDepsInSU(SUnit *SU) { + unsigned NumberDeps = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (I->isCtrl()) + NumberDeps++; + + return NumberDeps; +} + +static unsigned numberCtrlPredInSU(SUnit *SU) { + unsigned NumberDeps = 0; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (I->isCtrl()) + NumberDeps++; + + return NumberDeps; +} + +/// +/// Initialize nodes. +/// +void ResourcePriorityQueue::initNodes(std::vector &sunits) { + SUnits = &sunits; + NumNodesSolelyBlocking.resize(SUnits->size(), 0); + + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + initNumRegDefsLeft(SU); + SU->NodeQueueId = 0; + } +} + +/// This heuristic is used if DFA scheduling is not desired +/// for some VLIW platform. +bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { + // The isScheduleHigh flag allows nodes with wraparound dependencies that + // cannot easily be modeled as edges with latencies to be scheduled as + // soon as possible in a top-down schedule. + if (LHS->isScheduleHigh && !RHS->isScheduleHigh) + return false; + + if (!LHS->isScheduleHigh && RHS->isScheduleHigh) + return true; + + unsigned LHSNum = LHS->NodeNum; + unsigned RHSNum = RHS->NodeNum; + + // The most important heuristic is scheduling the critical path. + unsigned LHSLatency = PQ->getLatency(LHSNum); + unsigned RHSLatency = PQ->getLatency(RHSNum); + if (LHSLatency < RHSLatency) return true; + if (LHSLatency > RHSLatency) return false; + + // After that, if two nodes have identical latencies, look to see if one will + // unblock more other nodes than the other. + unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); + unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); + if (LHSBlocked < RHSBlocked) return true; + if (LHSBlocked > RHSBlocked) return false; + + // Finally, just to provide a stable ordering, use the node number as a + // deciding factor. + return LHSNum < RHSNum; +} + + +/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor +/// of SU, return it, otherwise return null. +SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { + SUnit *OnlyAvailablePred = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit &Pred = *I->getSUnit(); + if (!Pred.isScheduled) { + // We found an available, but not scheduled, predecessor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + return 0; + OnlyAvailablePred = &Pred; + } + } + return OnlyAvailablePred; +} + +void ResourcePriorityQueue::push(SUnit *SU) { + // Look at all of the successors of this node. Count the number of nodes that + // this node is the sole unscheduled node for. + unsigned NumNodesBlocking = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (getSingleUnscheduledPred(I->getSUnit()) == SU) + ++NumNodesBlocking; + + NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; + Queue.push_back(SU); +} + +/// Check if scheduling of this SU is possible +/// in the current packet. +bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { + if (!SU || !SU->getNode()) + return false; + + // If this is a compound instruction, + // it is likely to be a call. Do not delay it. + if (SU->getNode()->getGluedNode()) + return true; + + // First see if the pipeline could receive this instruction + // in the current cycle. + if (SU->getNode()->isMachineOpcode()) + switch (SU->getNode()->getMachineOpcode()) { + default: + if (!ResourcesModel->canReserveResources(&TII->get( + SU->getNode()->getMachineOpcode()))) + return false; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + break; + } + + // Now see if there are no other dependencies + // to instructions alredy in the packet. + for (unsigned i = 0, e = Packet.size(); i != e; ++i) + for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), + E = Packet[i]->Succs.end(); I != E; ++I) { + // Since we do not add pseudos to packets, might as well + // ignor order deps. + if (I->isCtrl()) + continue; + + if (I->getSUnit() == SU) + return false; + } + + return true; +} + +/// Keep track of available resources. +void ResourcePriorityQueue::reserveResources(SUnit *SU) { + // If this SU does not fit in the packet + // start a new one. + if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) { + ResourcesModel->clearResources(); + Packet.clear(); + } + + if (SU->getNode() && SU->getNode()->isMachineOpcode()) { + switch (SU->getNode()->getMachineOpcode()) { + default: + ResourcesModel->reserveResources(&TII->get( + SU->getNode()->getMachineOpcode())); + break; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + break; + } + Packet.push_back(SU); + } + // Forcefully end packet for PseudoOps. + else { + ResourcesModel->clearResources(); + Packet.clear(); + } + + // If packet is now full, reset the state so in the next cycle + // we start fresh. + if (Packet.size() >= InstrItins->IssueWidth) { + ResourcesModel->clearResources(); + Packet.clear(); + } +} + +signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { + signed RegBalance = 0; + + if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) + return RegBalance; + + // Gen estimate. + for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) { + EVT VT = SU->getNode()->getValueType(i); + if (TLI->isTypeLegal(VT) + && TLI->getRegClassFor(VT) + && TLI->getRegClassFor(VT)->getID() == RCId) + RegBalance += numberRCValSuccInSU(SU, RCId); + } + // Kill estimate. + for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) { + const SDValue &Op = SU->getNode()->getOperand(i); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); + if (isa(Op.getNode())) + continue; + + if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT) + && TLI->getRegClassFor(VT)->getID() == RCId) + RegBalance -= numberRCValPredInSU(SU, RCId); + } + return RegBalance; +} + +/// Estimates change in reg pressure from this SU. +/// It is acheived by trivial tracking of defined +/// and used vregs in dependent instructions. +/// The RawPressure flag makes this function to ignore +/// existing reg file sizes, and report raw def/use +/// balance. +signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { + signed RegBalance = 0; + + if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) + return RegBalance; + + if (RawPressure) { + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + RegBalance += rawRegPressureDelta(SU, RC->getID()); + } + } + else { + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + if ((RegPressure[RC->getID()] + + rawRegPressureDelta(SU, RC->getID()) > 0) && + (RegPressure[RC->getID()] + + rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()])) + RegBalance += rawRegPressureDelta(SU, RC->getID()); + } + } + + return RegBalance; +} + +// Constants used to denote relative importance of +// heuristic components for cost computation. +static const unsigned PriorityOne = 200; +static const unsigned PriorityTwo = 100; +static const unsigned PriorityThree = 50; +static const unsigned PriorityFour = 15; +static const unsigned PriorityFive = 5; +static const unsigned ScaleOne = 20; +static const unsigned ScaleTwo = 10; +static const unsigned ScaleThree = 5; +static const unsigned FactorOne = 2; + +/// Returns single number reflecting benefit of scheduling SU +/// in the current cycle. +signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { + // Initial trivial priority. + signed ResCount = 1; + + // Do not waste time on a node that is already scheduled. + if (SU->isScheduled) + return ResCount; + + // Forced priority is high. + if (SU->isScheduleHigh) + ResCount += PriorityOne; + + // Adaptable scheduling + // A small, but very parallel + // region, where reg pressure is an issue. + if (HorizontalVerticalBalance > RegPressureThreshold) { + // Critical path first + ResCount += (SU->getHeight() * ScaleTwo); + // If resources are available for it, multiply the + // chance of scheduling. + if (isResourceAvailable(SU)) + ResCount <<= FactorOne; + + // Consider change to reg pressure from scheduling + // this SU. + ResCount -= (regPressureDelta(SU,true) * ScaleOne); + } + // Default heuristic, greeady and + // critical path driven. + else { + // Critical path first. + ResCount += (SU->getHeight() * ScaleTwo); + // Now see how many instructions is blocked by this SU. + ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo); + // If resources are available for it, multiply the + // chance of scheduling. + if (isResourceAvailable(SU)) + ResCount <<= FactorOne; + + ResCount -= (regPressureDelta(SU) * ScaleTwo); + } + + // These are platform specific things. + // Will need to go into the back end + // and accessed from here via a hook. + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { + if (N->isMachineOpcode()) { + const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); + if (TID.isCall()) + ResCount += (PriorityThree + (ScaleThree*N->getNumValues())); + } + else + switch (N->getOpcode()) { + default: break; + case ISD::TokenFactor: + case ISD::CopyFromReg: + case ISD::CopyToReg: + ResCount += PriorityFive; + break; + + case ISD::INLINEASM: + ResCount += PriorityFour; + break; + } + } + return ResCount; +} + + +/// Main resource tracking point. +void ResourcePriorityQueue::scheduledNode(SUnit *SU) { + // Use NULL entry as an event marker to reset + // the DFA state. + if (!SU) { + ResourcesModel->clearResources(); + Packet.clear(); + return; + } + + const SDNode *ScegN = SU->getNode(); + // Update reg pressure tracking. + // First update current node. + if (ScegN->isMachineOpcode()) { + // Estimate generated regs. + for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { + EVT VT = ScegN->getValueType(i); + + if (TLI->isTypeLegal(VT)) { + const TargetRegisterClass *RC = TLI->getRegClassFor(VT); + if (RC) + RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID()); + } + } + // Estimate killed regs. + for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { + const SDValue &Op = ScegN->getOperand(i); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); + + if (TLI->isTypeLegal(VT)) { + const TargetRegisterClass *RC = TLI->getRegClassFor(VT); + if (RC) { + if (RegPressure[RC->getID()] > + (numberRCValPredInSU(SU, RC->getID()))) + RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID()); + else RegPressure[RC->getID()] = 0; + } + } + } + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0)) + continue; + --I->getSUnit()->NumRegDefsLeft; + } + } + + // Reserve resources for this SU. + reserveResources(SU); + + // Adjust number of parallel live ranges. + // Heuristic is simple - node with no data successors reduces + // number of live ranges. All others, increase it. + unsigned NumberNonControlDeps = 0; + + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + adjustPriorityOfUnscheduledPreds(I->getSUnit()); + if (!I->isCtrl()) + NumberNonControlDeps++; + } + + if (!NumberNonControlDeps) { + if (ParallelLiveRanges >= SU->NumPreds) + ParallelLiveRanges -= SU->NumPreds; + else + ParallelLiveRanges = 0; + + } + else + ParallelLiveRanges += SU->NumRegDefsLeft; + + // Track parallel live chains. + HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU)); + HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU)); +} + +void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) { + unsigned NodeNumDefs = 0; + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) + if (N->isMachineOpcode()) { + const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); + // No register need be allocated for this. + if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { + NodeNumDefs = 0; + break; + } + NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs()); + } + else + switch(N->getOpcode()) { + default: break; + case ISD::CopyFromReg: + NodeNumDefs++; + break; + case ISD::INLINEASM: + NodeNumDefs++; + break; + } + + SU->NumRegDefsLeft = NodeNumDefs; +} + +/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just +/// scheduled. If SU is not itself available, then there is at least one +/// predecessor node that has not been scheduled yet. If SU has exactly ONE +/// unscheduled predecessor, we want to increase its priority: it getting +/// scheduled will make this node available, so it is better than some other +/// node of the same priority that will not make a node available. +void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) { + if (SU->isAvailable) return; // All preds scheduled. + + SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); + if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) + return; + + // Okay, we found a single predecessor that is available, but not scheduled. + // Since it is available, it must be in the priority queue. First remove it. + remove(OnlyAvailablePred); + + // Reinsert the node into the priority queue, which recomputes its + // NumNodesSolelyBlocking value. + push(OnlyAvailablePred); +} + + +/// Main access point - returns next instructions +/// to be placed in scheduling sequence. +SUnit *ResourcePriorityQueue::pop() { + if (empty()) + return 0; + + std::vector::iterator Best = Queue.begin(); + if (!DisableDFASched) { + signed BestCost = SUSchedulingCost(*Best); + for (std::vector::iterator I = Queue.begin(), + E = Queue.end(); I != E; ++I) { + if (*I == *Best) + continue; + + if (SUSchedulingCost(*I) > BestCost) { + BestCost = SUSchedulingCost(*I); + Best = I; + } + } + } + // Use default TD scheduling mechanism. + else { + for (std::vector::iterator I = llvm::next(Queue.begin()), + E = Queue.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + } + + SUnit *V = *Best; + if (Best != prior(Queue.end())) + std::swap(*Best, Queue.back()); + + Queue.pop_back(); + + return V; +} + + +void ResourcePriorityQueue::remove(SUnit *SU) { + assert(!Queue.empty() && "Queue is empty!"); + std::vector::iterator I = std::find(Queue.begin(), Queue.end(), SU); + if (I != prior(Queue.end())) + std::swap(*I, Queue.back()); + + Queue.pop_back(); +} + + +#ifdef NDEBUG +void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {} +#else +void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const { + ResourcePriorityQueue q = *this; + while (!q.empty()) { + SUnit *su = q.pop(); + dbgs() << "Height " << su->getHeight() << ": "; + su->dump(DAG); + } +} +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index b275c6321ae4..24da432a47a1 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -43,7 +43,7 @@ namespace { SmallVector Queue; bool empty() const { return Queue.empty(); } - + void push(SUnit *U) { Queue.push_back(U); } @@ -101,8 +101,8 @@ class ScheduleDAGFast : public ScheduleDAGSDNodes { bool DelayForLiveRegsBottomUp(SUnit*, SmallVector&); void ListScheduleBottomUp(); - /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies. - bool ForceUnitLatencies() const { return true; } + /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. + bool forceUnitLatencies() const { return true; } }; } // end anonymous namespace @@ -112,7 +112,7 @@ void ScheduleDAGFast::Schedule() { DEBUG(dbgs() << "********** List Scheduling **********\n"); NumLiveRegs = 0; - LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegDefs.resize(TRI->getNumRegs(), NULL); LiveRegCycles.resize(TRI->getNumRegs(), 0); // Build the scheduling graph. @@ -159,7 +159,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { ReleasePred(SU, &*I); if (I->isAssignedRegDep()) { // This is a physical register dependency and it's impossible or - // expensive to copy the register. Make sure nothing that can + // expensive to copy the register. Make sure nothing that can // clobber the register is scheduled between the predecessor and // this node. if (!LiveRegDefs[I->getReg()]) { @@ -245,10 +245,10 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), SDValue(LoadNode, 1)); - SUnit *NewSU = NewSUnit(N); + SUnit *NewSU = newSUnit(N); assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NewSU->NodeNum); - + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { @@ -268,7 +268,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { LoadSU = &SUnits[LoadNode->getNodeId()]; isNewLoad = false; } else { - LoadSU = NewSUnit(LoadNode); + LoadSU = newSUnit(LoadNode); LoadNode->setNodeId(LoadSU->NodeNum); } @@ -329,7 +329,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { D.setSUnit(LoadSU); AddPred(SuccDep, D); } - } + } if (isNewLoad) { AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency)); } @@ -381,11 +381,11 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, SmallVector &Copies) { - SUnit *CopyFromSU = NewSUnit(static_cast(NULL)); + SUnit *CopyFromSU = newSUnit(static_cast(NULL)); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; - SUnit *CopyToSU = NewSUnit(static_cast(NULL)); + SUnit *CopyToSU = newSUnit(static_cast(NULL)); CopyToSU->CopySrcRC = DestRC; CopyToSU->CopyDstRC = SrcRC; @@ -425,7 +425,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); unsigned NumRes = MCID.getNumDefs(); - for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -447,7 +447,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, Added = true; } } - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) + for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { if (RegAdded.insert(*Alias)) { LRegs.push_back(*Alias); @@ -508,7 +508,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) { + for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } @@ -630,7 +630,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { std::reverse(Sequence.begin(), Sequence.end()); #ifndef NDEBUG - VerifySchedule(/*isBottomUp=*/true); + VerifyScheduledSequence(/*isBottomUp=*/true); #endif } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index e757defd3895..2cb5d37d689e 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -44,10 +44,6 @@ static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler); -static RegisterScheduler - tdrListrDAGScheduler("list-tdrr", - "Top-down register reduction list scheduling", - createTDRRListDAGScheduler); static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " @@ -93,6 +89,9 @@ static cl::opt DisableSchedCriticalPath( static cl::opt DisableSchedHeight( "disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp")); +static cl::opt Disable2AddrHack( + "disable-2addr-hack", cl::Hidden, cl::init(true), + cl::desc("Disable scheduler's two-address hack")); static cl::opt MaxReorderWindow( "max-sched-reorder", cl::Hidden, cl::init(6), @@ -103,17 +102,6 @@ static cl::opt AvgIPC( "sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists.")); -#ifndef NDEBUG -namespace { - // For sched=list-ilp, Count the number of times each factor comes into play. - enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth, - FactStatic, FactOther, NumFactors }; -} -static const char *FactorName[NumFactors] = -{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"}; -static int FactorCount[NumFactors]; -#endif //!NDEBUG - namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler @@ -121,10 +109,6 @@ namespace { /// class ScheduleDAGRRList : public ScheduleDAGSDNodes { private: - /// isBottomUp - This is true if the scheduling problem is bottom-up, false if - /// it is top-down. - bool isBottomUp; - /// NeedLatency - True if the scheduler will make use of latency information. /// bool NeedLatency; @@ -162,11 +146,15 @@ class ScheduleDAGRRList : public ScheduleDAGSDNodes { /// and similar queries. ScheduleDAGTopologicalSort Topo; + // Hack to keep track of the inverse of FindCallSeqStart without more crazy + // DAG crawling. + DenseMap CallSeqEndForStart; + public: ScheduleDAGRRList(MachineFunction &mf, bool needlatency, SchedulingPriorityQueue *availqueue, CodeGenOpt::Level OptLevel) - : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()), + : ScheduleDAGSDNodes(mf), NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), Topo(SUnits) { @@ -221,8 +209,6 @@ class ScheduleDAGRRList : public ScheduleDAGSDNodes { void ReleasePred(SUnit *SU, const SDep *PredEdge); void ReleasePredecessors(SUnit *SU); - void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); - void ReleaseSuccessors(SUnit *SU); void ReleasePending(); void AdvanceToCycle(unsigned NextCycle); void AdvancePastStalls(SUnit *SU); @@ -242,15 +228,11 @@ class ScheduleDAGRRList : public ScheduleDAGSDNodes { SUnit *PickNodeToScheduleBottomUp(); void ListScheduleBottomUp(); - void ScheduleNodeTopDown(SUnit*); - void ListScheduleTopDown(); - - /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. /// Updates the topological ordering if required. SUnit *CreateNewSUnit(SDNode *N) { unsigned NumSUnits = SUnits.size(); - SUnit *NewNode = NewSUnit(N); + SUnit *NewNode = newSUnit(N); // Update the topological ordering. if (NewNode->NodeNum >= NumSUnits) Topo.InitDAGTopologicalSorting(); @@ -268,9 +250,9 @@ class ScheduleDAGRRList : public ScheduleDAGSDNodes { return NewNode; } - /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't + /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't /// need actual latency information but the hybrid scheduler does. - bool ForceUnitLatencies() const { + bool forceUnitLatencies() const { return !NeedLatency; } }; @@ -278,7 +260,7 @@ class ScheduleDAGRRList : public ScheduleDAGSDNodes { /// GetCostForDef - Looks up the register class and cost for a given definition. /// Typically this just means looking up the representative register class, -/// but for untyped values (MVT::untyped) it means inspecting the node's +/// but for untyped values (MVT::Untyped) it means inspecting the node's /// opcode to determine what register class is being generated. static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, @@ -289,7 +271,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, // Special handling for untyped values. These values can only come from // the expansion of custom DAG-to-DAG patterns. - if (VT == MVT::untyped) { + if (VT == MVT::Untyped) { const SDNode *Node = RegDefPos.GetNode(); unsigned Opcode = Node->getMachineOpcode(); @@ -319,18 +301,16 @@ void ScheduleDAGRRList::Schedule() { DEBUG(dbgs() << "********** List Scheduling BB#" << BB->getNumber() << " '" << BB->getName() << "' **********\n"); -#ifndef NDEBUG - for (int i = 0; i < NumFactors; ++i) { - FactorCount[i] = 0; - } -#endif //!NDEBUG CurCycle = 0; IssueCount = 0; MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; NumLiveRegs = 0; - LiveRegDefs.resize(TRI->getNumRegs(), NULL); - LiveRegGens.resize(TRI->getNumRegs(), NULL); + // Allocate slots for each physical register, plus one for a special register + // to track the virtual resource of a calling sequence. + LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); + LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); + CallSeqEndForStart.clear(); // Build the scheduling graph. BuildSchedGraph(NULL); @@ -343,18 +323,16 @@ void ScheduleDAGRRList::Schedule() { HazardRec->Reset(); - // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. - if (isBottomUp) - ListScheduleBottomUp(); - else - ListScheduleTopDown(); + // Execute the actual scheduling loop. + ListScheduleBottomUp(); -#ifndef NDEBUG - for (int i = 0; i < NumFactors; ++i) { - DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n"); - } -#endif // !NDEBUG AvailableQueue->releaseState(); + + DEBUG({ + dbgs() << "*** Final schedule ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); } //===----------------------------------------------------------------------===// @@ -376,7 +354,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { #endif --PredSU->NumSuccsLeft; - if (!ForceUnitLatencies()) { + if (!forceUnitLatencies()) { // Updating predecessor's height. This is now the cycle when the // predecessor can be scheduled without causing a pipeline stall. PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); @@ -403,6 +381,109 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { } } +/// IsChainDependent - Test if Outer is reachable from Inner through +/// chain dependencies. +static bool IsChainDependent(SDNode *Outer, SDNode *Inner, + unsigned NestLevel, + const TargetInstrInfo *TII) { + SDNode *N = Outer; + for (;;) { + if (N == Inner) + return true; + // For a TokenFactor, examine each operand. There may be multiple ways + // to get to the CALLSEQ_BEGIN, but we need to find the path with the + // most nesting in order to ensure that we find the corresponding match. + if (N->getOpcode() == ISD::TokenFactor) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII)) + return true; + return false; + } + // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. + if (N->isMachineOpcode()) { + if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameDestroyOpcode()) { + ++NestLevel; + } else if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameSetupOpcode()) { + if (NestLevel == 0) + return false; + --NestLevel; + } + } + // Otherwise, find the chain and continue climbing. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) { + N = N->getOperand(i).getNode(); + goto found_chain_operand; + } + return false; + found_chain_operand:; + if (N->getOpcode() == ISD::EntryToken) + return false; + } +} + +/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate +/// the corresponding (lowered) CALLSEQ_BEGIN node. +/// +/// NestLevel and MaxNested are used in recursion to indcate the current level +/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum +/// level seen so far. +/// +/// TODO: It would be better to give CALLSEQ_END an explicit operand to point +/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. +static SDNode * +FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, + const TargetInstrInfo *TII) { + for (;;) { + // For a TokenFactor, examine each operand. There may be multiple ways + // to get to the CALLSEQ_BEGIN, but we need to find the path with the + // most nesting in order to ensure that we find the corresponding match. + if (N->getOpcode() == ISD::TokenFactor) { + SDNode *Best = 0; + unsigned BestMaxNest = MaxNest; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + unsigned MyNestLevel = NestLevel; + unsigned MyMaxNest = MaxNest; + if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(), + MyNestLevel, MyMaxNest, TII)) + if (!Best || (MyMaxNest > BestMaxNest)) { + Best = New; + BestMaxNest = MyMaxNest; + } + } + assert(Best); + MaxNest = BestMaxNest; + return Best; + } + // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. + if (N->isMachineOpcode()) { + if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameDestroyOpcode()) { + ++NestLevel; + MaxNest = std::max(MaxNest, NestLevel); + } else if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameSetupOpcode()) { + assert(NestLevel != 0); + --NestLevel; + if (NestLevel == 0) + return N; + } + } + // Otherwise, find the chain and continue climbing. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) { + N = N->getOperand(i).getNode(); + goto found_chain_operand; + } + return 0; + found_chain_operand:; + if (N->getOpcode() == ISD::EntryToken) + return 0; + } +} + /// Call ReleasePred for each predecessor, then update register live def/gen. /// Always update LiveRegDefs for a register dependence even if the current SU /// also defines the register. This effectively create one large live range @@ -440,6 +521,27 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { } } } + + // If we're scheduling a lowered CALLSEQ_END, find the corresponding + // CALLSEQ_BEGIN. Inject an artificial physical register dependence between + // these nodes, to prevent other calls from being interscheduled with them. + unsigned CallResource = TRI->getNumRegs(); + if (!LiveRegDefs[CallResource]) + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) + if (Node->isMachineOpcode() && + Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + unsigned NestLevel = 0; + unsigned MaxNest = 0; + SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); + + SUnit *Def = &SUnits[N->getNodeId()]; + CallSeqEndForStart[Def] = SU; + + ++NumLiveRegs; + LiveRegDefs[CallResource] = Def; + LiveRegGens[CallResource] = SU; + break; + } } /// Check to see if any of the pending instructions are ready to issue. If @@ -457,8 +559,7 @@ void ScheduleDAGRRList::ReleasePending() { // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { - unsigned ReadyCycle = - isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth(); + unsigned ReadyCycle = PendingQueue[i]->getHeight(); if (ReadyCycle < MinAvailableCycle) MinAvailableCycle = ReadyCycle; @@ -487,10 +588,7 @@ void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { } else { for (; CurCycle != NextCycle; ++CurCycle) { - if (isBottomUp) - HazardRec->RecedeCycle(); - else - HazardRec->AdvanceCycle(); + HazardRec->RecedeCycle(); } } // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the @@ -511,7 +609,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { // currently need to treat these nodes like real instructions. // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; - unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); + unsigned ReadyCycle = SU->getHeight(); // Bump CurCycle to account for latency. We assume the latency of other // available instructions may be hidden by the stall (not a full pipe stall). @@ -522,7 +620,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { // Calls are scheduled in their preceding cycle, so don't conflict with // hazards from instructions after the call. EmitNode will reset the // scoreboard state before emitting the call. - if (isBottomUp && SU->isCall) + if (SU->isCall) return; // FIXME: For resource conflicts in very long non-pipelined stages, we @@ -530,7 +628,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { int Stalls = 0; while (true) { ScheduleHazardRecognizer::HazardType HT = - HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls); + HazardRec->getHazardType(SU, -Stalls); if (HT == ScheduleHazardRecognizer::NoHazard) break; @@ -568,17 +666,13 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) { HazardRec->Reset(); return; } - if (isBottomUp && SU->isCall) { + if (SU->isCall) { // Calls are scheduled with their preceding instructions. For bottom-up // scheduling, clear the pipeline state before emitting. HazardRec->Reset(); } HazardRec->EmitInstruction(SU); - - if (!isBottomUp && SU->isCall) { - HazardRec->Reset(); - } } static void resetVRegCycle(SUnit *SU); @@ -607,7 +701,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { Sequence.push_back(SU); - AvailableQueue->ScheduledNode(SU); + AvailableQueue->scheduledNode(SU); // If HazardRec is disabled, and each inst counts as one cycle, then // advance CurCycle before ReleasePredecessors to avoid useless pushes to @@ -630,6 +724,20 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { LiveRegGens[I->getReg()] = NULL; } } + // Release the special call resource dependence, if this is the beginning + // of a call. + unsigned CallResource = TRI->getNumRegs(); + if (LiveRegDefs[CallResource] == SU) + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[CallResource] = NULL; + LiveRegGens[CallResource] = NULL; + } + } resetVRegCycle(SU); @@ -686,15 +794,41 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { } } + // Reclaim the special call resource dependence, if this is the beginning + // of a call. + unsigned CallResource = TRI->getNumRegs(); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { + ++NumLiveRegs; + LiveRegDefs[CallResource] = SU; + LiveRegGens[CallResource] = CallSeqEndForStart[SU]; + } + } + + // Release the special call resource dependence, if this is the end + // of a call. + if (LiveRegGens[CallResource] == SU) + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[CallResource] = NULL; + LiveRegGens[CallResource] = NULL; + } + } + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { if (I->isAssignedRegDep()) { + if (!LiveRegDefs[I->getReg()]) + ++NumLiveRegs; // This becomes the nearest def. Note that an earlier def may still be // pending if this is a two-address node. LiveRegDefs[I->getReg()] = SU; - if (!LiveRegDefs[I->getReg()]) { - ++NumLiveRegs; - } if (LiveRegGens[I->getReg()] == NULL || I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) LiveRegGens[I->getReg()] = I->getSUnit(); @@ -714,7 +848,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { else { AvailableQueue->push(SU); } - AvailableQueue->UnscheduledNode(SU); + AvailableQueue->unscheduledNode(SU); } /// After backtracking, the hazard checker needs to be restored to a state @@ -805,6 +939,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return NULL; + // unfolding an x86 DEC64m operation results in store, dec, load which + // can't be handled here so quit + if (NewNodes.size() == 3) + return NULL; + DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); @@ -830,7 +969,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { LoadNode->setNodeId(LoadSU->NodeNum); InitNumRegDefsLeft(LoadSU); - ComputeLatency(LoadSU); + computeLatency(LoadSU); } SUnit *NewSU = CreateNewSUnit(N); @@ -848,7 +987,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { NewSU->isCommutable = true; InitNumRegDefsLeft(NewSU); - ComputeLatency(NewSU); + computeLatency(NewSU); // Record all the edges to and from the old SU, by category. SmallVector ChainPreds; @@ -1027,7 +1166,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); unsigned NumRes = MCID.getNumDefs(); - for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -1042,7 +1181,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SmallSet &RegAdded, SmallVector &LRegs, const TargetRegisterInfo *TRI) { - for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { + for (const uint16_t *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { // Check if Ref is live. if (!LiveRegDefs[*AliasI]) continue; @@ -1057,6 +1196,31 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, } } +/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered +/// by RegMask, and add them to LRegs. +static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, + std::vector &LiveRegDefs, + SmallSet &RegAdded, + SmallVector &LRegs) { + // Look at all live registers. Skip Reg0 and the special CallResource. + for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { + if (!LiveRegDefs[i]) continue; + if (LiveRegDefs[i] == SU) continue; + if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; + if (RegAdded.insert(i)) + LRegs.push_back(i); + } +} + +/// getNodeRegMask - Returns the register mask attached to an SDNode, if any. +static const uint32_t *getNodeRegMask(const SDNode *N) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (const RegisterMaskSDNode *Op = + dyn_cast(N->getOperand(i).getNode())) + return Op->getRegMask(); + return NULL; +} + /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay /// scheduling of the given node to satisfy live physical register dependencies. /// If the specific node is the last one that's available to schedule, do @@ -1108,10 +1272,27 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector &LRegs) { if (!Node->isMachineOpcode()) continue; + // If we're in the middle of scheduling a call, don't begin scheduling + // another call. Also, don't allow any physical registers to be live across + // the call. + if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + // Check the special calling-sequence resource. + unsigned CallResource = TRI->getNumRegs(); + if (LiveRegDefs[CallResource]) { + SDNode *Gen = LiveRegGens[CallResource]->getNode(); + while (SDNode *Glued = Gen->getGluedNode()) + Gen = Glued; + if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource)) + LRegs.push_back(CallResource); + } + } + if (const uint32_t *RegMask = getNodeRegMask(Node)) + CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs); + const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) + for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } @@ -1300,99 +1481,10 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { std::reverse(Sequence.begin(), Sequence.end()); #ifndef NDEBUG - VerifySchedule(isBottomUp); + VerifyScheduledSequence(/*isBottomUp=*/true); #endif } -//===----------------------------------------------------------------------===// -// Top-Down Scheduling -//===----------------------------------------------------------------------===// - -/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to -/// the AvailableQueue if the count reaches zero. Also update its cycle bound. -void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { - SUnit *SuccSU = SuccEdge->getSUnit(); - -#ifndef NDEBUG - if (SuccSU->NumPredsLeft == 0) { - dbgs() << "*** Scheduling failed! ***\n"; - SuccSU->dump(this); - dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); - } -#endif - --SuccSU->NumPredsLeft; - - // If all the node's predecessors are scheduled, this node is ready - // to be scheduled. Ignore the special ExitSU node. - if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { - SuccSU->isAvailable = true; - AvailableQueue->push(SuccSU); - } -} - -void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { - // Top down: release successors - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - assert(!I->isAssignedRegDep() && - "The list-tdrr scheduler doesn't yet support physreg dependencies!"); - - ReleaseSucc(SU, &*I); - } -} - -/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending -/// count of its successors. If a successor pending count is zero, add it to -/// the Available queue. -void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) { - DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); - DEBUG(SU->dump(this)); - - assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); - SU->setDepthToAtLeast(CurCycle); - Sequence.push_back(SU); - - ReleaseSuccessors(SU); - SU->isScheduled = true; - AvailableQueue->ScheduledNode(SU); -} - -/// ListScheduleTopDown - The main loop of list scheduling for top-down -/// schedulers. -void ScheduleDAGRRList::ListScheduleTopDown() { - AvailableQueue->setCurCycle(CurCycle); - - // Release any successors of the special Entry node. - ReleaseSuccessors(&EntrySU); - - // All leaves to Available queue. - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - // It is available if it has no predecessors. - if (SUnits[i].Preds.empty()) { - AvailableQueue->push(&SUnits[i]); - SUnits[i].isAvailable = true; - } - } - - // While Available queue is not empty, grab the node with the highest - // priority. If it is not ready put it back. Schedule the node. - Sequence.reserve(SUnits.size()); - while (!AvailableQueue->empty()) { - SUnit *CurSU = AvailableQueue->pop(); - - if (CurSU) - ScheduleNodeTopDown(CurSU); - ++CurCycle; - AvailableQueue->setCurCycle(CurCycle); - } - -#ifndef NDEBUG - VerifySchedule(isBottomUp); -#endif -} - - //===----------------------------------------------------------------------===// // RegReductionPriorityQueue Definition //===----------------------------------------------------------------------===// @@ -1437,21 +1529,6 @@ struct bu_ls_rr_sort : public queue_sort { bool operator()(SUnit* left, SUnit* right) const; }; -// td_ls_rr_sort - Priority function for top down register pressure reduction -// scheduler. -struct td_ls_rr_sort : public queue_sort { - enum { - IsBottomUp = false, - HasReadyFilter = false - }; - - RegReductionPQBase *SPQ; - td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} - - bool operator()(const SUnit* left, const SUnit* right) const; -}; - // src_ls_rr_sort - Priority function for source order scheduler. struct src_ls_rr_sort : public queue_sort { enum { @@ -1510,6 +1587,7 @@ class RegReductionPQBase : public SchedulingPriorityQueue { std::vector Queue; unsigned CurQueueId; bool TracksRegPressure; + bool SrcOrder; // SUnits - The SUnits for the current graph. std::vector *SUnits; @@ -1535,11 +1613,12 @@ class RegReductionPQBase : public SchedulingPriorityQueue { RegReductionPQBase(MachineFunction &mf, bool hasReadyFilter, bool tracksrp, + bool srcorder, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, const TargetLowering *tli) : SchedulingPriorityQueue(hasReadyFilter), - CurQueueId(0), TracksRegPressure(tracksrp), + CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { if (TracksRegPressure) { unsigned NumRC = TRI->getNumRegClasses(); @@ -1610,9 +1689,9 @@ class RegReductionPQBase : public SchedulingPriorityQueue { int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; - void ScheduledNode(SUnit *SU); + void scheduledNode(SUnit *SU); - void UnscheduledNode(SUnit *SU); + void unscheduledNode(SUnit *SU); protected: bool canClobber(const SUnit *SU, const SUnit *Op); @@ -1654,10 +1733,12 @@ class RegReductionPriorityQueue : public RegReductionPQBase { public: RegReductionPriorityQueue(MachineFunction &mf, bool tracksrp, + bool srcorder, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, const TargetLowering *tli) - : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli), + : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder, + tii, tri, tli), Picker(this) {} bool isBottomUp() const { return SF::IsBottomUp; } @@ -1680,10 +1761,7 @@ class RegReductionPriorityQueue : public RegReductionPQBase { SF DumpPicker = Picker; while (!DumpQueue.empty()) { SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); - if (isBottomUp()) - dbgs() << "Height " << SU->getHeight() << ": "; - else - dbgs() << "Depth " << SU->getDepth() << ": "; + dbgs() << "Height " << SU->getHeight() << ": "; SU->dump(DAG); } } @@ -1692,9 +1770,6 @@ class RegReductionPriorityQueue : public RegReductionPQBase { typedef RegReductionPriorityQueue BURegReductionPriorityQueue; -typedef RegReductionPriorityQueue -TDRegReductionPriorityQueue; - typedef RegReductionPriorityQueue SrcRegReductionPriorityQueue; @@ -1919,7 +1994,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { return PDiff; } -void RegReductionPQBase::ScheduledNode(SUnit *SU) { +void RegReductionPQBase::scheduledNode(SUnit *SU) { if (!TracksRegPressure) return; @@ -1988,7 +2063,7 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) { dumpRegPressure(); } -void RegReductionPQBase::UnscheduledNode(SUnit *SU) { +void RegReductionPQBase::unscheduledNode(SUnit *SU) { if (!TracksRegPressure) return; @@ -2235,37 +2310,29 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, int LHeight = (int)left->getHeight() + LPenalty; int RHeight = (int)right->getHeight() + RPenalty; - bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) && + bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && BUHasStall(left, LHeight, SPQ); - bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) && + bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && BUHasStall(right, RHeight, SPQ); // If scheduling one of the node will cause a pipeline stall, delay it. // If scheduling either one of the node will cause a pipeline stall, sort // them according to their height. if (LStall) { - if (!RStall) { - DEBUG(++FactorCount[FactStall]); + if (!RStall) return 1; - } - if (LHeight != RHeight) { - DEBUG(++FactorCount[FactStall]); + if (LHeight != RHeight) return LHeight > RHeight ? 1 : -1; - } - } else if (RStall) { - DEBUG(++FactorCount[FactStall]); + } else if (RStall) return -1; - } // If either node is scheduling for latency, sort them by height/depth // and latency. - if (!checkPref || (left->SchedulingPref == Sched::Latency || - right->SchedulingPref == Sched::Latency)) { + if (!checkPref || (left->SchedulingPref == Sched::ILP || + right->SchedulingPref == Sched::ILP)) { if (DisableSchedCycles) { - if (LHeight != RHeight) { - DEBUG(++FactorCount[FactHeight]); + if (LHeight != RHeight) return LHeight > RHeight ? 1 : -1; - } } else { // If neither instruction stalls (!LStall && !RStall) then @@ -2274,17 +2341,14 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, int LDepth = left->getDepth() - LPenalty; int RDepth = right->getDepth() - RPenalty; if (LDepth != RDepth) { - DEBUG(++FactorCount[FactDepth]); DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum << ") depth " << LDepth << " vs SU (" << right->NodeNum << ") depth " << RDepth << "\n"); return LDepth < RDepth ? 1 : -1; } } - if (left->Latency != right->Latency) { - DEBUG(++FactorCount[FactOther]); + if (left->Latency != right->Latency) return left->Latency > right->Latency ? 1 : -1; - } } return 0; } @@ -2298,7 +2362,6 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { bool LHasPhysReg = left->hasPhysRegDefs; bool RHasPhysReg = right->hasPhysRegDefs; if (LHasPhysReg != RHasPhysReg) { - DEBUG(++FactorCount[FactRegUses]); #ifndef NDEBUG const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"}; #endif @@ -2324,10 +2387,8 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; } - if (LPriority != RPriority) { - DEBUG(++FactorCount[FactStatic]); + if (LPriority != RPriority) return LPriority > RPriority; - } // One or both of the nodes are calls and their sethi-ullman numbers are the // same, then keep source order. @@ -2360,18 +2421,14 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { // This creates more short live intervals. unsigned LDist = closestSucc(left); unsigned RDist = closestSucc(right); - if (LDist != RDist) { - DEBUG(++FactorCount[FactOther]); + if (LDist != RDist) return LDist < RDist; - } // How many registers becomes live when the node is scheduled. unsigned LScratch = calcMaxScratches(left); unsigned RScratch = calcMaxScratches(right); - if (LScratch != RScratch) { - DEBUG(++FactorCount[FactOther]); + if (LScratch != RScratch) return LScratch > RScratch; - } // Comparing latency against a call makes little sense unless the node // is register pressure-neutral. @@ -2386,20 +2443,15 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { return result > 0; } else { - if (left->getHeight() != right->getHeight()) { - DEBUG(++FactorCount[FactHeight]); + if (left->getHeight() != right->getHeight()) return left->getHeight() > right->getHeight(); - } - if (left->getDepth() != right->getDepth()) { - DEBUG(++FactorCount[FactDepth]); + if (left->getDepth() != right->getDepth()) return left->getDepth() < right->getDepth(); - } } assert(left->NodeQueueId && right->NodeQueueId && "NodeQueueId cannot be zero"); - DEBUG(++FactorCount[FactOther]); return (left->NodeQueueId > right->NodeQueueId); } @@ -2459,13 +2511,11 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { // Avoid causing spills. If register pressure is high, schedule for // register pressure reduction. if (LHigh && !RHigh) { - DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" << right->NodeNum << ")\n"); return true; } else if (!LHigh && RHigh) { - DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" << left->NodeNum << ")\n"); return false; @@ -2529,7 +2579,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { RPDiff = SPQ->RegPressureDiff(right, RLiveUses); } if (!DisableSchedRegPressure && LPDiff != RPDiff) { - DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); return LPDiff > RPDiff; @@ -2538,7 +2587,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { bool LReduce = canEnableCoalescing(left); bool RReduce = canEnableCoalescing(right); - DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]); if (LReduce && !RReduce) return false; if (RReduce && !LReduce) return true; } @@ -2546,17 +2594,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); - DEBUG(++FactorCount[FactRegUses]); return LLiveUses < RLiveUses; } if (!DisableSchedStalls) { bool LStall = BUHasStall(left, left->getHeight(), SPQ); bool RStall = BUHasStall(right, right->getHeight(), SPQ); - if (LStall != RStall) { - DEBUG(++FactorCount[FactHeight]); + if (LStall != RStall) return left->getHeight() > right->getHeight(); - } } if (!DisableSchedCriticalPath) { @@ -2565,17 +2610,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " << left->getDepth() << " != SU(" << right->NodeNum << "): " << right->getDepth() << "\n"); - DEBUG(++FactorCount[FactDepth]); return left->getDepth() < right->getDepth(); } } if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { int spread = (int)left->getHeight() - (int)right->getHeight(); - if (std::abs(spread) > MaxReorderWindow) { - DEBUG(++FactorCount[FactHeight]); + if (std::abs(spread) > MaxReorderWindow) return left->getHeight() > right->getHeight(); - } } return BURRSort(left, right, SPQ); @@ -2584,9 +2626,10 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { void RegReductionPQBase::initNodes(std::vector &sunits) { SUnits = &sunits; // Add pseudo dependency edges for two-address nodes. - AddPseudoTwoAddrDeps(); + if (!Disable2AddrHack) + AddPseudoTwoAddrDeps(); // Reroute edges to nodes with multiple uses. - if (!TracksRegPressure) + if (!TracksRegPressure && !SrcOrder) PrescheduleNodesWithMultipleUses(); // Calculate node priorities. CalculateSethiUllmanNumbers(); @@ -2628,9 +2671,10 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - const unsigned *ImpDefs + const uint16_t *ImpDefs = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); - if(!ImpDefs) + const uint32_t *RegMask = getNodeRegMask(SU->getNode()); + if(!ImpDefs && !RegMask) return false; for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); @@ -2641,14 +2685,18 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, if (!PI->isAssignedRegDep()) continue; - for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) { - // Return true if SU clobbers this physical register use and the - // definition of the register reaches from DepSU. IsReachable queries a - // topological forward sort of the DAG (following the successors). - if (TRI->regsOverlap(*ImpDef, PI->getReg()) && - scheduleDAG->IsReachable(DepSU, PI->getSUnit())) - return true; - } + if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) && + scheduleDAG->IsReachable(DepSU, PI->getSUnit())) + return true; + + if (ImpDefs) + for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef) + // Return true if SU clobbers this physical register use and the + // definition of the register reaches from DepSU. IsReachable queries + // a topological forward sort of the DAG (following the successors). + if (TRI->regsOverlap(*ImpDef, PI->getReg()) && + scheduleDAG->IsReachable(DepSU, PI->getSUnit())) + return true; } } return false; @@ -2661,16 +2709,17 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetRegisterInfo *TRI) { SDNode *N = SuccSU->getNode(); unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); + const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); assert(ImpDefs && "Caller should check hasPhysRegDefs"); for (const SDNode *SUNode = SU->getNode(); SUNode; SUNode = SUNode->getGluedNode()) { if (!SUNode->isMachineOpcode()) continue; - const unsigned *SUImpDefs = + const uint16_t *SUImpDefs = TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); - if (!SUImpDefs) - return false; + const uint32_t *SURegMask = getNodeRegMask(SUNode); + if (!SUImpDefs && !SURegMask) + continue; for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); if (VT == MVT::Glue || VT == MVT::Other) @@ -2678,6 +2727,10 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, if (!N->hasAnyUseOfValue(i)) continue; unsigned Reg = ImpDefs[i - NumDefs]; + if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) + return true; + if (!SUImpDefs) + continue; for (;*SUImpDefs; ++SUImpDefs) { unsigned SUReg = *SUImpDefs; if (TRI->regsOverlap(Reg, SUReg)) @@ -2887,69 +2940,6 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { } } -/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled -/// predecessors of the successors of the SUnit SU. Stop when the provided -/// limit is exceeded. -static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, - unsigned Limit) { - unsigned Sum = 0; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - const SUnit *SuccSU = I->getSUnit(); - for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), - EE = SuccSU->Preds.end(); II != EE; ++II) { - SUnit *PredSU = II->getSUnit(); - if (!PredSU->isScheduled) - if (++Sum > Limit) - return Sum; - } - } - return Sum; -} - - -// Top down -bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { - if (int res = checkSpecialNodes(left, right)) - return res < 0; - - unsigned LPriority = SPQ->getNodePriority(left); - unsigned RPriority = SPQ->getNodePriority(right); - bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); - bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); - bool LIsFloater = LIsTarget && left->NumPreds == 0; - bool RIsFloater = RIsTarget && right->NumPreds == 0; - unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; - unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; - - if (left->NumSuccs == 0 && right->NumSuccs != 0) - return false; - else if (left->NumSuccs != 0 && right->NumSuccs == 0) - return true; - - if (LIsFloater) - LBonus -= 2; - if (RIsFloater) - RBonus -= 2; - if (left->NumSuccs == 1) - LBonus += 2; - if (right->NumSuccs == 1) - RBonus += 2; - - if (LPriority+LBonus != RPriority+RBonus) - return LPriority+LBonus < RPriority+RBonus; - - if (left->getDepth() != right->getDepth()) - return left->getDepth() < right->getDepth(); - - if (left->NumSuccsLeft != right->NumSuccsLeft) - return left->NumSuccsLeft > right->NumSuccsLeft; - - assert(left->NodeQueueId && right->NodeQueueId && - "NodeQueueId cannot be zero"); - return (left->NodeQueueId > right->NodeQueueId); -} - //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// @@ -2962,21 +2952,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, const TargetRegisterInfo *TRI = TM.getRegisterInfo(); BURegReductionPriorityQueue *PQ = - new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); - PQ->setScheduleDAG(SD); - return SD; -} - -llvm::ScheduleDAGSDNodes * -llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, - CodeGenOpt::Level OptLevel) { - const TargetMachine &TM = IS->TM; - const TargetInstrInfo *TII = TM.getInstrInfo(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - - TDRegReductionPriorityQueue *PQ = - new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); + new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; @@ -2990,7 +2966,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, const TargetRegisterInfo *TRI = TM.getRegisterInfo(); SrcRegReductionPriorityQueue *PQ = - new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); + new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; @@ -3005,7 +2981,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, const TargetLowering *TLI = &IS->getTargetLowering(); HybridBURRPriorityQueue *PQ = - new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); + new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); PQ->setScheduleDAG(SD); @@ -3021,7 +2997,7 @@ llvm::createILPListDAGScheduler(SelectionDAGISel *IS, const TargetLowering *TLI = &IS->getTargetLowering(); ILPBURRPriorityQueue *PQ = - new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); + new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 71f07d6fa47a..69dd813b24e0 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -17,6 +17,8 @@ #include "ScheduleDAGSDNodes.h" #include "InstrEmitter.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -44,20 +46,26 @@ static cl::opt HighLatencyCycles( "instructions take for targets with no itinerary")); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) - : ScheduleDAG(mf), + : ScheduleDAG(mf), BB(0), DAG(0), InstrItins(mf.getTarget().getInstrItineraryData()) {} /// Run - perform scheduling. /// -void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb, - MachineBasicBlock::iterator insertPos) { +void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) { + BB = bb; DAG = dag; - ScheduleDAG::Run(bb, insertPos); + + // Clear the scheduler's SUnit DAG. + ScheduleDAG::clearDAG(); + Sequence.clear(); + + // Invoke the target's selection of scheduler. + Schedule(); } /// NewSUnit - Creates a new SUnit and return a ptr to it. /// -SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { +SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) { #ifndef NDEBUG const SUnit *Addr = 0; if (!SUnits.empty()) @@ -79,7 +87,7 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { } SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { - SUnit *SU = NewSUnit(Old->getNode()); + SUnit *SU = newSUnit(Old->getNode()); SU->OrigNode = Old->OrigNode; SU->Latency = Old->Latency; SU->isVRegCycle = Old->isVRegCycle; @@ -302,7 +310,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // If this node has already been processed, stop now. if (NI->getNodeId() != -1) continue; - SUnit *NodeSUnit = NewSUnit(NI); + SUnit *NodeSUnit = newSUnit(NI); // See if anything is glued to this node, if so, add them to glued // nodes. Nodes can have at most one glue input and one glue output. Glue @@ -360,7 +368,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { InitNumRegDefsLeft(NodeSUnit); // Assign the Latency field of NodeSUnit using target-provided information. - ComputeLatency(NodeSUnit); + computeLatency(NodeSUnit); } // Find all call operands. @@ -382,7 +390,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { const TargetSubtargetInfo &ST = TM.getSubtarget(); // Check to see if the scheduler cares about latencies. - bool UnitLatencies = ForceUnitLatencies(); + bool UnitLatencies = forceUnitLatencies(); // Pass 2: add the preds, succs, etc. for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { @@ -448,7 +456,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpLatency, PhysReg); if (!isChain && !UnitLatencies) { - ComputeOperandLatency(OpN, N, i, const_cast(dep)); + computeOperandLatency(OpN, N, i, const_cast(dep)); ST.adjustSchedDependency(OpSU, SU, const_cast(dep)); } @@ -541,7 +549,7 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { } } -void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { +void ScheduleDAGSDNodes::computeLatency(SUnit *SU) { SDNode *N = SU->getNode(); // TokenFactor operands are considered zero latency, and some schedulers @@ -553,7 +561,7 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { } // Check to see if the scheduler cares about latencies. - if (ForceUnitLatencies()) { + if (forceUnitLatencies()) { SU->Latency = 1; return; } @@ -575,10 +583,10 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { SU->Latency += TII->getInstrLatency(InstrItins, N); } -void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, +void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, unsigned OpIdx, SDep& dep) const{ // Check to see if the scheduler cares about latencies. - if (ForceUnitLatencies()) + if (forceUnitLatencies()) return; if (dep.getKind() != SDep::Data) @@ -621,6 +629,30 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { } } +void ScheduleDAGSDNodes::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + SU->dump(this); + else + dbgs() << "**** NOOP ****\n"; + } +} + +#ifndef NDEBUG +/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that +/// their state is consistent with the nodes listed in Sequence. +/// +void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { + unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp); + unsigned Noops = 0; + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(Sequence.size() - Noops == ScheduledNodes && + "The number of nodes scheduled doesn't match the expected number!"); +} +#endif // NDEBUG + namespace { struct OrderSorter { bool operator()(const std::pair &A, @@ -686,9 +718,48 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); } +void ScheduleDAGSDNodes:: +EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, + MachineBasicBlock::iterator InsertPos) { + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + if (I->getSUnit()->CopyDstRC) { + // Copy to physical register. + DenseMap::iterator VRI = VRBaseMap.find(I->getSUnit()); + assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); + // Find the destination physical register. + unsigned Reg = 0; + for (SUnit::const_succ_iterator II = SU->Succs.begin(), + EE = SU->Succs.end(); II != EE; ++II) { + if (II->isCtrl()) continue; // ignore chain preds + if (II->getReg()) { + Reg = II->getReg(); + break; + } + } + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) + .addReg(VRI->second); + } else { + // Copy from physical register. + assert(I->getReg() && "Unknown physical register!"); + unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); + bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) + .addReg(I->getReg()); + } + break; + } +} -/// EmitSchedule - Emit the machine code in scheduled order. -MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { +/// EmitSchedule - Emit the machine code in scheduled order. Return the new +/// InsertPos and MachineBasicBlock that contains this insertion +/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does +/// not necessarily refer to returned BB. The emitter may split blocks. +MachineBasicBlock *ScheduleDAGSDNodes:: +EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(BB, InsertPos); DenseMap VRBaseMap; DenseMap CopyVRBaseMap; @@ -711,7 +782,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { SUnit *SU = Sequence[i]; if (!SU) { // Null SUnit* is a noop. - EmitNoop(); + TII->insertNoop(*Emitter.getBlock(), InsertPos); continue; } @@ -719,7 +790,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { // SDNode and any glued SDNodes and append them to the block. if (!SU->getNode()) { // Emit a copy. - EmitPhysRegCopy(SU, CopyVRBaseMap); + EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos); continue; } @@ -784,19 +855,24 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { } // Add trailing DbgValue's before the terminator. FIXME: May want to add // some of them before one or more conditional branches? + SmallVector DbgMIs; while (DI != DE) { - MachineBasicBlock *InsertBB = Emitter.getBlock(); - MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator(); - if (!(*DI)->isInvalidated()) { - MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap); - if (DbgMI) - InsertBB->insert(Pos, DbgMI); - } + if (!(*DI)->isInvalidated()) + if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap)) + DbgMIs.push_back(DbgMI); ++DI; } + + MachineBasicBlock *InsertBB = Emitter.getBlock(); + MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator(); + InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end()); } - BB = Emitter.getBlock(); InsertPos = Emitter.getInsertPos(); - return BB; + return Emitter.getBlock(); +} + +/// Return the basic block label. +std::string ScheduleDAGSDNodes::getDAGName() const { + return "sunit-dag." + BB->getFullName(); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 9c27b2ea02ec..75940ec33ddc 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -35,17 +35,20 @@ namespace llvm { /// class ScheduleDAGSDNodes : public ScheduleDAG { public: + MachineBasicBlock *BB; SelectionDAG *DAG; // DAG of the current basic block const InstrItineraryData *InstrItins; + /// The schedule. Null SUnit*'s represent noop instructions. + std::vector Sequence; + explicit ScheduleDAGSDNodes(MachineFunction &mf); virtual ~ScheduleDAGSDNodes() {} /// Run - perform scheduling. /// - void Run(SelectionDAG *dag, MachineBasicBlock *bb, - MachineBasicBlock::iterator insertPos); + void Run(SelectionDAG *dag, MachineBasicBlock *bb); /// isPassiveNode - Return true if the node is a non-scheduled leaf. /// @@ -53,6 +56,7 @@ namespace llvm { if (isa(Node)) return true; if (isa(Node)) return true; if (isa(Node)) return true; + if (isa(Node)) return true; if (isa(Node)) return true; if (isa(Node)) return true; if (isa(Node)) return true; @@ -67,7 +71,7 @@ namespace llvm { /// NewSUnit - Creates a new SUnit and return a ptr to it. /// - SUnit *NewSUnit(SDNode *N); + SUnit *newSUnit(SDNode *N); /// Clone - Creates a clone of the specified SUnit. It does not copy the /// predecessors / successors info nor the temporary scheduling states. @@ -78,7 +82,7 @@ namespace llvm { /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. - virtual void BuildSchedGraph(AliasAnalysis *AA); + void BuildSchedGraph(AliasAnalysis *AA); /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is /// CopyToReg and its only active data operands are CopyFromReg within a @@ -90,30 +94,41 @@ namespace llvm { /// void InitNumRegDefsLeft(SUnit *SU); - /// ComputeLatency - Compute node latency. + /// computeLatency - Compute node latency. /// - virtual void ComputeLatency(SUnit *SU); + virtual void computeLatency(SUnit *SU); - /// ComputeOperandLatency - Override dependence edge latency using + /// computeOperandLatency - Override dependence edge latency using /// operand use/def information /// - virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use, + virtual void computeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const { } - virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use, + virtual void computeOperandLatency(SDNode *Def, SDNode *Use, unsigned OpIdx, SDep& dep) const; - virtual MachineBasicBlock *EmitSchedule(); - /// Schedule - Order nodes according to selected style, filling /// in the Sequence member. /// virtual void Schedule() = 0; + /// VerifyScheduledSequence - Verify that all SUnits are scheduled and + /// consistent with the Sequence of scheduled instructions. + void VerifyScheduledSequence(bool isBottomUp); + + /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock + /// according to the order specified in Sequence. + /// + MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); + virtual void dumpNode(const SUnit *SU) const; + void dumpSchedule() const; + virtual std::string getGraphNodeLabel(const SUnit *SU) const; + virtual std::string getDAGName() const; + virtual void getCustomGraphFeatures(GraphWriter &GW) const; /// RegDefIter - In place iteration over the values defined by an @@ -159,6 +174,9 @@ namespace llvm { /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph. void BuildSchedUnits(); void AddSchedEdges(); + + void EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, + MachineBasicBlock::iterator InsertPos); }; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp similarity index 77% rename from contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp rename to contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 430283d5eff9..c8512914c1e2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -1,4 +1,4 @@ -//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===// +//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -31,6 +31,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/ResourcePriorityQueue.h" #include using namespace llvm; @@ -38,15 +39,15 @@ STATISTIC(NumNoops , "Number of noops inserted"); STATISTIC(NumStalls, "Number of pipeline stalls"); static RegisterScheduler - tdListDAGScheduler("list-td", "Top-down list scheduler", - createTDListDAGScheduler); + VLIWScheduler("vliw-td", "VLIW scheduler", + createVLIWDAGScheduler); namespace { //===----------------------------------------------------------------------===// -/// ScheduleDAGList - The actual list scheduler implementation. This supports -/// top-down scheduling. +/// ScheduleDAGVLIW - The actual DFA list scheduler implementation. This +/// supports / top-down scheduling. /// -class ScheduleDAGList : public ScheduleDAGSDNodes { +class ScheduleDAGVLIW : public ScheduleDAGSDNodes { private: /// AvailableQueue - The priority queue to use for the available SUnits. /// @@ -61,16 +62,20 @@ class ScheduleDAGList : public ScheduleDAGSDNodes { /// HazardRec - The hazard recognizer to use. ScheduleHazardRecognizer *HazardRec; + /// AA - AliasAnalysis for making memory reference queries. + AliasAnalysis *AA; + public: - ScheduleDAGList(MachineFunction &mf, + ScheduleDAGVLIW(MachineFunction &mf, + AliasAnalysis *aa, SchedulingPriorityQueue *availqueue) - : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) { + : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { const TargetMachine &tm = mf.getTarget(); HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); } - ~ScheduleDAGList() { + ~ScheduleDAGVLIW() { delete HazardRec; delete AvailableQueue; } @@ -78,23 +83,25 @@ class ScheduleDAGList : public ScheduleDAGSDNodes { void Schedule(); private: - void ReleaseSucc(SUnit *SU, const SDep &D); - void ReleaseSuccessors(SUnit *SU); - void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); - void ListScheduleTopDown(); + void releaseSucc(SUnit *SU, const SDep &D); + void releaseSuccessors(SUnit *SU); + void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void listScheduleTopDown(); }; } // end anonymous namespace /// Schedule - Schedule the DAG using list scheduling. -void ScheduleDAGList::Schedule() { - DEBUG(dbgs() << "********** List Scheduling **********\n"); +void ScheduleDAGVLIW::Schedule() { + DEBUG(dbgs() + << "********** List Scheduling BB#" << BB->getNumber() + << " '" << BB->getName() << "' **********\n"); // Build the scheduling graph. - BuildSchedGraph(NULL); + BuildSchedGraph(AA); AvailableQueue->initNodes(SUnits); - ListScheduleTopDown(); + listScheduleTopDown(); AvailableQueue->releaseState(); } @@ -103,9 +110,9 @@ void ScheduleDAGList::Schedule() { // Top-Down Scheduling //===----------------------------------------------------------------------===// -/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to /// the PendingQueue if the count reaches zero. Also update its cycle bound. -void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { +void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) { SUnit *SuccSU = D.getSUnit(); #ifndef NDEBUG @@ -122,25 +129,26 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { // If all the node's predecessors are scheduled, this node is ready // to be scheduled. Ignore the special ExitSU node. - if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { PendingQueue.push_back(SuccSU); + } } -void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) { +void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) { // Top down: release successors. for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { assert(!I->isAssignedRegDep() && "The list-td scheduler doesn't yet support physreg dependencies!"); - ReleaseSucc(SU, *I); + releaseSucc(SU, *I); } } -/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. -void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { +void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); @@ -148,20 +156,20 @@ void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); SU->setDepthToAtLeast(CurCycle); - ReleaseSuccessors(SU); + releaseSuccessors(SU); SU->isScheduled = true; - AvailableQueue->ScheduledNode(SU); + AvailableQueue->scheduledNode(SU); } -/// ListScheduleTopDown - The main loop of list scheduling for top-down +/// listScheduleTopDown - The main loop of list scheduling for top-down /// schedulers. -void ScheduleDAGList::ListScheduleTopDown() { +void ScheduleDAGVLIW::listScheduleTopDown() { unsigned CurCycle = 0; // Release any successors of the special Entry node. - ReleaseSuccessors(&EntrySU); + releaseSuccessors(&EntrySU); - // All leaves to Available queue. + // All leaves to AvailableQueue. for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { // It is available if it has no predecessors. if (SUnits[i].Preds.empty()) { @@ -170,7 +178,7 @@ void ScheduleDAGList::ListScheduleTopDown() { } } - // While Available queue is not empty, grab the node with the highest + // While AvailableQueue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. std::vector NotReady; Sequence.reserve(SUnits.size()); @@ -184,7 +192,8 @@ void ScheduleDAGList::ListScheduleTopDown() { PendingQueue[i] = PendingQueue.back(); PendingQueue.pop_back(); --i; --e; - } else { + } + else { assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?"); } } @@ -192,6 +201,8 @@ void ScheduleDAGList::ListScheduleTopDown() { // If there are no instructions available, don't try to issue anything, and // don't advance the hazard recognizer. if (AvailableQueue->empty()) { + // Reset DFA state. + AvailableQueue->scheduledNode(0); ++CurCycle; continue; } @@ -223,7 +234,7 @@ void ScheduleDAGList::ListScheduleTopDown() { // If we found a node to schedule, do it now. if (FoundSUnit) { - ScheduleNodeTopDown(FoundSUnit, CurCycle); + scheduleNodeTopDown(FoundSUnit, CurCycle); HazardRec->EmitInstruction(FoundSUnit); // If this is a pseudo-op node, we don't want to increment the current @@ -250,7 +261,7 @@ void ScheduleDAGList::ListScheduleTopDown() { } #ifndef NDEBUG - VerifySchedule(/*isBottomUp=*/false); + VerifyScheduledSequence(/*isBottomUp=*/false); #endif } @@ -258,8 +269,8 @@ void ScheduleDAGList::ListScheduleTopDown() { // Public Constructor Functions //===----------------------------------------------------------------------===// -/// createTDListDAGScheduler - This creates a top-down list scheduler. +/// createVLIWDAGScheduler - This creates a top-down list scheduler. ScheduleDAGSDNodes * -llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { - return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue()); +llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS)); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 20bea8e4c9e9..92671d1678c6 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -28,7 +28,6 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" @@ -63,6 +62,7 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unknown FP format"); + case MVT::f16: return &APFloat::IEEEhalf; case MVT::f32: return &APFloat::IEEEsingle; case MVT::f64: return &APFloat::IEEEdouble; case MVT::f80: return &APFloat::x87DoubleExtended; @@ -125,20 +125,29 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { if (i == e) return false; // Do not accept build_vectors that aren't all constants or which have non-~0 - // elements. + // elements. We have to be a bit careful here, as the type of the constant + // may not be the same as the type of the vector elements due to type + // legalization (the elements are promoted to a legal type for the target and + // a vector of a type may be legal when the base element type is not). + // We only want to check enough bits to cover the vector elements, because + // we care if the resultant vector is all ones, not whether the individual + // constants are. SDValue NotZero = N->getOperand(i); + unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); if (isa(NotZero)) { - if (!cast(NotZero)->isAllOnesValue()) + if (cast(NotZero)->getAPIntValue().countTrailingOnes() < + EltSize) return false; } else if (isa(NotZero)) { - if (!cast(NotZero)->getValueAPF(). - bitcastToAPInt().isAllOnesValue()) + if (cast(NotZero)->getValueAPF() + .bitcastToAPInt().countTrailingOnes() < EltSize) return false; } else return false; // Okay, we have at least one ~0 value, check to see if the rest match or are - // undefs. + // undefs. Even with the above element type twiddling, this should be OK, as + // the same type legalization should have applied to all the elements. for (++i; i != e; ++i) if (N->getOperand(i) != NotZero && N->getOperand(i).getOpcode() != ISD::UNDEF) @@ -384,7 +393,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { case ISD::Register: ID.AddInteger(cast(N)->getReg()); break; - + case ISD::RegisterMask: + ID.AddPointer(cast(N)->getRegMask()); + break; case ISD::SRCVALUE: ID.AddPointer(cast(N)->getValue()); break; @@ -475,7 +486,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { /// static inline unsigned encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile, - bool isNonTemporal) { + bool isNonTemporal, bool isInvariant) { assert((ConvType & 3) == ConvType && "ConvType may not require more than 2 bits!"); assert((AM & 7) == AM && @@ -483,7 +494,8 @@ encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile, return ConvType | (AM << 2) | (isVolatile << 5) | - (isNonTemporal << 6); + (isNonTemporal << 6) | + (isInvariant << 7); } //===----------------------------------------------------------------------===// @@ -564,6 +576,12 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl &DeadNodes, void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){ SmallVector DeadNodes(1, N); + + // Create a dummy node that adds a reference to the root node, preventing + // it from being deleted. (This matters if the root is an operand of the + // dead node.) + HandleSDNode Dummy(getRoot()); + RemoveDeadNodes(DeadNodes, UpdateListener); } @@ -834,9 +852,9 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { } // EntryNode could meaningfully have debug info if we can find it... -SelectionDAG::SelectionDAG(const TargetMachine &tm) +SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), - EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), + OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), Ordering(0) { AllNodes.push_back(&EntryNode); Ordering = new SDNodeOrdering(); @@ -1025,16 +1043,14 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { return getConstantFP(APFloat((float)Val), VT, isTarget); else if (EltVT==MVT::f64) return getConstantFP(APFloat(Val), VT, isTarget); - else if (EltVT==MVT::f80 || EltVT==MVT::f128) { + else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::f16) { bool ignored; APFloat apf = APFloat(Val); apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, &ignored); return getConstantFP(apf, VT, isTarget); - } else { - assert(0 && "Unsupported type in getConstantFP"); - return SDValue(); - } + } else + llvm_unreachable("Unsupported type in getConstantFP"); } SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, @@ -1369,6 +1385,20 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { return SDValue(N, 0); } +SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), 0, 0); + ID.AddPointer(RegMask); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { FoldingSetNodeID ID; SDValue Ops[] = { Root }; @@ -1598,7 +1628,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth) const { APInt KnownZero, KnownOne; - ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); return (KnownZero & Mask) == Mask; } @@ -1607,15 +1637,12 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, /// known to be either zero or one and return them in the KnownZero/KnownOne /// bitsets. This code only analyzes bits in Mask, in order to short-circuit /// processing. -void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, - APInt &KnownZero, APInt &KnownOne, - unsigned Depth) const { - unsigned BitWidth = Mask.getBitWidth(); - assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() && - "Mask size mismatches value type size!"); +void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, + APInt &KnownOne, unsigned Depth) const { + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. - if (Depth == 6 || Mask == 0) + if (Depth == 6) return; // Limit search depth. APInt KnownZero2, KnownOne2; @@ -1623,14 +1650,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, switch (Op.getOpcode()) { case ISD::Constant: // We know all of the bits for a constant! - KnownOne = cast(Op)->getAPIntValue() & Mask; - KnownZero = ~KnownOne & Mask; + KnownOne = cast(Op)->getAPIntValue(); + KnownZero = ~KnownOne; return; case ISD::AND: // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero, - KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1640,9 +1666,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, KnownZero |= KnownZero2; return; case ISD::OR: - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne, - KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1652,8 +1677,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, KnownOne |= KnownOne2; return; case ISD::XOR: { - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1665,9 +1690,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::MUL: { - APInt Mask2 = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1686,33 +1710,29 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, LeadZ = std::min(LeadZ, BitWidth); KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | APInt::getHighBitsSet(BitWidth, LeadZ); - KnownZero &= Mask; return; } case ISD::UDIV: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(Op.getOperand(0), - AllOnes, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - ComputeMaskedBits(Op.getOperand(1), - AllOnes, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); - KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); return; } case ISD::SELECT: - ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1721,8 +1741,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, KnownZero &= KnownZero2; return; case ISD::SELECT_CC: - ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1754,8 +1774,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (ShAmt >= BitWidth) return; - ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt), - KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero <<= ShAmt; KnownOne <<= ShAmt; @@ -1772,13 +1791,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (ShAmt >= BitWidth) return; - ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt), - KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); - APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask; + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); KnownZero |= HighBits; // High bits known zero. } return; @@ -1790,15 +1808,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (ShAmt >= BitWidth) return; - APInt InDemandedMask = (Mask << ShAmt); // If any of the demanded bits are produced by the sign extension, we also // demand the input sign bit. - APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask; - if (HighBits.getBoolValue()) - InDemandedMask |= APInt::getSignBit(BitWidth); + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); - ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, - Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); @@ -1820,10 +1834,10 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Sign extension. Compute the demanded bits in the result that are not // present in the input. - APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask; + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits); APInt InSignBit = APInt::getSignBit(EBits); - APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits); + APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits); // If the sign extended bits are demanded, we know that the sign // bit is demanded. @@ -1831,8 +1845,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (NewBits.getBoolValue()) InputDemandedBits |= InSignBit; - ComputeMaskedBits(Op.getOperand(0), InputDemandedBits, - KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownOne &= InputDemandedBits; + KnownZero &= InputDemandedBits; assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the @@ -1850,7 +1865,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: { unsigned LowBits = Log2_32(BitWidth)+1; KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); @@ -1858,22 +1875,23 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::LOAD: { + LoadSDNode *LD = cast(Op); if (ISD::isZEXTLoad(Op.getNode())) { - LoadSDNode *LD = cast(Op); EVT VT = LD->getMemoryVT(); unsigned MemBits = VT.getScalarType().getSizeInBits(); - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask; + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + } else if (const MDNode *Ranges = LD->getRanges()) { + computeMaskedBitsLoad(*Ranges, KnownZero); } return; } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; - APInt InMask = Mask.trunc(InBits); + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); KnownZero |= NewBits; @@ -1883,17 +1901,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt InSignBit = APInt::getSignBit(InBits); - APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; - APInt InMask = Mask.trunc(InBits); - - // If any of the sign extended bits are demanded, we know that the sign - // bit is demanded. Temporarily set this bit in the mask for our callee. - if (NewBits.getBoolValue()) - InMask |= InSignBit; + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); // Note if the sign bit is known to be zero or one. bool SignBitKnownZero = KnownZero.isNegative(); @@ -1901,13 +1913,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, assert(!(SignBitKnownZero && SignBitKnownOne) && "Sign bit can't be known to be both zero and one!"); - // If the sign bit wasn't actually demanded by our caller, we don't - // want it set in the KnownZero and KnownOne result values. Reset the - // mask and reapply it to the result values. - InMask = Mask.trunc(InBits); - KnownZero &= InMask; - KnownOne &= InMask; - KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -1921,10 +1926,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::ANY_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InMask = Mask.trunc(InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); return; @@ -1932,10 +1936,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::TRUNCATE: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InMask = Mask.zext(InBits); KnownZero = KnownZero.zext(InBits); KnownOne = KnownOne.zext(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.trunc(BitWidth); KnownOne = KnownOne.trunc(BitWidth); @@ -1944,9 +1947,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::AssertZext: { EVT VT = cast(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); - ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, - KnownOne, Depth+1); - KnownZero |= (~InMask) & Mask; + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownZero |= (~InMask); return; } case ISD::FGETSIGN: @@ -1963,8 +1965,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2, - Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -1972,7 +1973,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if ((KnownZero2 & MaskV) == MaskV) { unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); // Top bits known zero. - KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2); } } } @@ -1983,13 +1984,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the // low 3 bits clear. - APInt Mask2 = APInt::getLowBitsSet(BitWidth, - BitWidth - Mask.countLeadingZeros()); - ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); - ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); KnownZeroOut = std::min(KnownZeroOut, KnownZero2.countTrailingOnes()); @@ -2013,7 +2012,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (RA.isPowerOf2()) { APInt LowBits = RA - 1; APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); - ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -2028,10 +2027,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // the upper bits are all one. if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) KnownOne |= ~LowBits; - - KnownZero &= Mask; - KnownOne &= Mask; - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); } } @@ -2041,9 +2036,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, const APInt &RA = Rem->getAPIntValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - APInt Mask2 = LowBits & Mask; - KnownZero |= ~LowBits & Mask; - ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1); + KnownZero |= ~LowBits; + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1); assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); break; } @@ -2051,16 +2045,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne, - Depth+1); - ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2, - Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); KnownOne.clearAllBits(); - KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); return; } case ISD::FrameIndex: @@ -2080,8 +2071,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: // Allow the target to implement this method for its nodes. - TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this, - Depth); + TLI.computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); return; } } @@ -2205,12 +2195,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (ConstantSDNode *CRHS = dyn_cast(Op.getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VTBits); - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(VTBits, 1)) == Mask) + if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) return VTBits; // If we are subtracting one from a positive number, there is no carry @@ -2221,8 +2210,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); if (Tmp2 == 1) return 1; - return std::min(Tmp, Tmp2)-1; - break; + return std::min(Tmp, Tmp2)-1; case ISD::SUB: Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); @@ -2232,11 +2220,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (ConstantSDNode *CLHS = dyn_cast(Op.getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VTBits); - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(VTBits, 1)) == Mask) + if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) return VTBits; // If the input is known to be positive (the sign bit is known clear), @@ -2251,8 +2238,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ // is, at worst, one more bit than the inputs. Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); if (Tmp == 1) return 1; // Early out. - return std::min(Tmp, Tmp2)-1; - break; + return std::min(Tmp, Tmp2)-1; case ISD::TRUNCATE: // FIXME: it's tricky to do anything useful for this, but it is an important // case for targets like X86. @@ -2286,9 +2272,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VTBits); - ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); + APInt Mask; if (KnownZero.isNegative()) { // sign bit is 0 Mask = KnownZero; } else if (KnownOne.isNegative()) { // sign bit is 1; @@ -2328,7 +2314,7 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { // If we're told that NaNs won't happen, assume they won't. - if (NoNaNsFPMath) + if (getTarget().Options.NoNaNsFPMath) return true; // If the value is a constant, we can obviously see if it is a NaN or not. @@ -2423,8 +2409,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, case ISD::CTPOP: return getConstant(Val.countPopulation(), VT); case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: return getConstant(Val.countLeadingZeros(), VT); case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: return getConstant(Val.countTrailingZeros(), VT); } } @@ -2440,7 +2428,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, case ISD::FABS: V.clearSign(); return getConstantFP(V, VT); - case ISD::FP_ROUND: case ISD::FP_EXTEND: { bool ignored; // This can return overflow, underflow, or inexact; we don't care. @@ -2561,17 +2548,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, "Vector element count mismatch!"); if (OpOpcode == ISD::TRUNCATE) return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); - else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || - OpOpcode == ISD::ANY_EXTEND) { + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) { // If the source is smaller than the dest, we still need an extend. if (Operand.getNode()->getOperand(0).getValueType().getScalarType() .bitsLT(VT.getScalarType())) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); - else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) + if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); - else - return Operand.getNode()->getOperand(0); + return Operand.getNode()->getOperand(0); } + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); break; case ISD::BITCAST: // Basic sanity checking. @@ -2601,7 +2589,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, break; case ISD::FNEG: // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if (UnsafeFPMath && OpOpcode == ISD::FSUB) + if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), Operand.getNode()->getOperand(0)); if (OpOpcode == ISD::FNEG) // --X -> X @@ -2736,7 +2724,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::FMUL: case ISD::FDIV: case ISD::FREM: - if (UnsafeFPMath) { + if (getTarget().Options.UnsafeFPMath) { if (Opcode == ISD::FADD) { // 0+x --> x if (ConstantFPSDNode *CFP = dyn_cast(N1)) @@ -3005,6 +2993,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, default: break; } } + + if (Opcode == ISD::FP_ROUND) { + APFloat V = N1CFP->getValueAPF(); // make copy + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(*EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, VT); + } } // Canonicalize an UNDEF to the RHS, even over a constant. @@ -3059,7 +3057,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::FMUL: case ISD::FDIV: case ISD::FREM: - if (UnsafeFPMath) + if (getTarget().Options.UnsafeFPMath) return N2; break; case ISD::MUL: @@ -3133,16 +3131,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::SELECT: if (N1C) { if (N1C->getZExtValue()) - return N2; // select true, X, Y -> X - else - return N3; // select false, X, Y -> Y + return N2; // select true, X, Y -> X + return N3; // select false, X, Y -> Y } if (N2 == N3) return N2; // select C, X, X -> X break; case ISD::VECTOR_SHUFFLE: llvm_unreachable("should use getVectorShuffle constructor!"); - break; case ISD::INSERT_SUBVECTOR: { SDValue Index = N3; if (VT.isSimple() && N1.getValueType().isSimple() @@ -3275,8 +3271,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, /// used when a memcpy is turned into a memset when the source is a constant /// string ptr. static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, - const TargetLowering &TLI, - std::string &Str, unsigned Offset) { + const TargetLowering &TLI, StringRef Str) { // Handle vector with all elements zero. if (Str.empty()) { if (VT.isInteger()) @@ -3294,15 +3289,18 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, } assert(!VT.isVector() && "Can't handle vector type here!"); - unsigned NumBits = VT.getSizeInBits(); - unsigned MSB = NumBits / 8; + unsigned NumVTBytes = VT.getSizeInBits() / 8; + unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size())); + uint64_t Val = 0; - if (TLI.isLittleEndian()) - Offset = Offset + MSB - 1; - for (unsigned i = 0; i != MSB; ++i) { - Val = (Val << 8) | (unsigned char)Str[Offset]; - Offset += TLI.isLittleEndian() ? -1 : 1; + if (TLI.isLittleEndian()) { + for (unsigned i = 0; i != NumBytes; ++i) + Val |= (uint64_t)(unsigned char)Str[i] << i*8; + } else { + for (unsigned i = 0; i != NumBytes; ++i) + Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8; } + return DAG.getConstant(Val, VT); } @@ -3317,7 +3315,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, /// isMemSrcFromString - Returns true if memcpy source is a string constant. /// -static bool isMemSrcFromString(SDValue Src, std::string &Str) { +static bool isMemSrcFromString(SDValue Src, StringRef &Str) { unsigned SrcDelta = 0; GlobalAddressSDNode *G = NULL; if (Src.getOpcode() == ISD::GlobalAddress) @@ -3331,11 +3329,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) { if (!G) return false; - const GlobalVariable *GV = dyn_cast(G->getGlobal()); - if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false)) - return true; - - return false; + return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false); } /// FindOptimalMemOpLowering - Determines the optimial series memory ops @@ -3345,7 +3339,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) { static bool FindOptimalMemOpLowering(std::vector &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, + bool IsZeroVal, bool MemcpyStrSrc, SelectionDAG &DAG, const TargetLowering &TLI) { @@ -3359,7 +3353,7 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does // not need to be loaded. EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, - NonScalarIntSafe, MemcpyStrSrc, + IsZeroVal, MemcpyStrSrc, DAG.getMachineFunction()); if (VT == MVT::Other) { @@ -3438,7 +3432,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned SrcAlign = DAG.InferPtrAlignment(Src); if (Align > SrcAlign) SrcAlign = Align; - std::string Str; + StringRef Str; bool CopyFromStr = isMemSrcFromString(Src, Str); bool isZeroStr = CopyFromStr && Str.empty(); unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); @@ -3475,7 +3469,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // We only handle zero vectors here. // FIXME: Handle other cases where store of vector immediate is done in // a single instruction. - Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff); + Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff)); Store = DAG.getStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), DstPtrInfo.getWithOffset(DstOff), isVol, @@ -3562,7 +3556,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Value = DAG.getLoad(VT, dl, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), SrcPtrInfo.getWithOffset(SrcOff), isVol, - false, SrcAlign); + false, false, SrcAlign); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -3606,11 +3600,11 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - bool NonScalarIntSafe = + bool IsZeroVal = isa(Src) && cast(Src)->isNullValue(); if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, (DstAlignCanChange ? 0 : Align), 0, - NonScalarIntSafe, false, DAG, TLI)) + IsZeroVal, false, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3717,8 +3711,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, std::pair CallResult = TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMCPY), false, - /*isReturnValueUsed=*/false, + TLI.getLibcallCallingConv(RTLIB::MEMCPY), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY), TLI.getPointerTy()), Args, *this, dl); @@ -3769,8 +3764,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, std::pair CallResult = TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false, - /*isReturnValueUsed=*/false, + TLI.getLibcallCallingConv(RTLIB::MEMMOVE), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE), TLI.getPointerTy()), Args, *this, dl); @@ -3829,8 +3825,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, std::pair CallResult = TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMSET), false, - /*isReturnValueUsed=*/false, + TLI.getLibcallCallingConv(RTLIB::MEMSET), + /*isTailCall=*/false, + /*doesNotReturn*/false, /*isReturnValueUsed=*/false, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), TLI.getPointerTy()), Args, *this, dl); @@ -4138,8 +4135,9 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo, EVT MemVT, - bool isVolatile, bool isNonTemporal, - unsigned Alignment, const MDNode *TBAAInfo) { + bool isVolatile, bool isNonTemporal, bool isInvariant, + unsigned Alignment, const MDNode *TBAAInfo, + const MDNode *Ranges) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 @@ -4150,6 +4148,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, Flags |= MachineMemOperand::MOVolatile; if (isNonTemporal) Flags |= MachineMemOperand::MONonTemporal; + if (isInvariant) + Flags |= MachineMemOperand::MOInvariant; // If we don't have a PtrInfo, infer the trivial frame index case to simplify // clients. @@ -4159,7 +4159,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, - TBAAInfo); + TBAAInfo, Ranges); return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } @@ -4196,7 +4196,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), - MMO->isNonTemporal())); + MMO->isNonTemporal(), + MMO->isInvariant())); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); @@ -4213,10 +4214,13 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, - unsigned Alignment, const MDNode *TBAAInfo) { + bool isInvariant, unsigned Alignment, + const MDNode *TBAAInfo, + const MDNode *Ranges) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, - PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo); + PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo, Ranges); } SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, @@ -4226,7 +4230,7 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, unsigned Alignment, const MDNode *TBAAInfo) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, - PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment, + PtrInfo, MemVT, isVolatile, isNonTemporal, false, Alignment, TBAAInfo); } @@ -4239,8 +4243,8 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, "Load is already a indexed load!"); return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, LD->getChain(), Base, Offset, LD->getPointerInfo(), - LD->getMemoryVT(), - LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); + LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), + false, LD->getAlignment()); } SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, @@ -4282,7 +4286,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(VT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal())); + MMO->isNonTemporal(), MMO->isInvariant())); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); @@ -4349,7 +4353,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(SVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal())); + MMO->isNonTemporal(), MMO->isInvariant())); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); @@ -4903,6 +4907,20 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, return N; } +/// UpdadeDebugLocOnMergedSDNode - If the opt level is -O0 then it throws away +/// the line number information on the merged node since it is not possible to +/// preserve the information that operation is associated with multiple lines. +/// This will make the debugger working better at -O0, were there is a higher +/// probability having other instructions associated with that line. +/// +SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) { + DebugLoc NLoc = N->getDebugLoc(); + if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && (OLoc != NLoc)) { + N->setDebugLoc(DebugLoc()); + } + return N; +} + /// MorphNodeTo - This *mutates* the specified node to have the specified /// return type, opcode, and operands. /// @@ -4924,7 +4942,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) - return ON; + return UpdadeDebugLocOnMergedSDNode(ON, N->getDebugLoc()); } if (!RemoveNodeFromCSEMaps(N)) @@ -5128,8 +5146,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, FoldingSetNodeID ID; AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) - return cast(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + return cast(UpdadeDebugLocOnMergedSDNode(E, DL)); + } } // Allocate a new MachineSDNode. @@ -5290,6 +5309,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (FromN == getRoot()) + setRoot(To); } /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. @@ -5335,6 +5358,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (From == getRoot().getNode()) + setRoot(SDValue(To, getRoot().getResNo())); } /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. @@ -5373,6 +5400,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (From == getRoot().getNode()) + setRoot(SDValue(To[getRoot().getResNo()])); } /// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving @@ -5431,6 +5462,10 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (From == getRoot()) + setRoot(To); } namespace { @@ -5657,7 +5692,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, MachineMemOperand *mmo) : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal()); + MMO->isNonTemporal(), MMO->isInvariant()); assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); assert(isNonTemporal() == MMO->isNonTemporal() && "Non-temporal encoding error!"); @@ -5670,7 +5705,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, : SDNode(Opc, dl, VTs, Ops, NumOps), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal()); + MMO->isNonTemporal(), MMO->isInvariant()); assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); } @@ -5846,565 +5881,6 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const { return cast(OperandList[Num])->getZExtValue(); } -std::string SDNode::getOperationName(const SelectionDAG *G) const { - switch (getOpcode()) { - default: - if (getOpcode() < ISD::BUILTIN_OP_END) - return "<>"; - if (isMachineOpcode()) { - if (G) - if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) - if (getMachineOpcode() < TII->getNumOpcodes()) - return TII->get(getMachineOpcode()).getName(); - return "<>"; - } - if (G) { - const TargetLowering &TLI = G->getTargetLoweringInfo(); - const char *Name = TLI.getTargetNodeName(getOpcode()); - if (Name) return Name; - return "<>"; - } - return "<>"; - -#ifndef NDEBUG - case ISD::DELETED_NODE: - return "<>"; -#endif - case ISD::PREFETCH: return "Prefetch"; - case ISD::MEMBARRIER: return "MemBarrier"; - case ISD::ATOMIC_FENCE: return "AtomicFence"; - case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; - case ISD::ATOMIC_SWAP: return "AtomicSwap"; - case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; - case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; - case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; - case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; - case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; - case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; - case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; - case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; - case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; - case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; - case ISD::ATOMIC_LOAD: return "AtomicLoad"; - case ISD::ATOMIC_STORE: return "AtomicStore"; - case ISD::PCMARKER: return "PCMarker"; - case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; - case ISD::SRCVALUE: return "SrcValue"; - case ISD::MDNODE_SDNODE: return "MDNode"; - case ISD::EntryToken: return "EntryToken"; - case ISD::TokenFactor: return "TokenFactor"; - case ISD::AssertSext: return "AssertSext"; - case ISD::AssertZext: return "AssertZext"; - - case ISD::BasicBlock: return "BasicBlock"; - case ISD::VALUETYPE: return "ValueType"; - case ISD::Register: return "Register"; - - case ISD::Constant: return "Constant"; - case ISD::ConstantFP: return "ConstantFP"; - case ISD::GlobalAddress: return "GlobalAddress"; - case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; - case ISD::FrameIndex: return "FrameIndex"; - case ISD::JumpTable: return "JumpTable"; - case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; - case ISD::RETURNADDR: return "RETURNADDR"; - case ISD::FRAMEADDR: return "FRAMEADDR"; - case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; - case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; - case ISD::LSDAADDR: return "LSDAADDR"; - case ISD::EHSELECTION: return "EHSELECTION"; - case ISD::EH_RETURN: return "EH_RETURN"; - case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; - case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; - case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP"; - case ISD::ConstantPool: return "ConstantPool"; - case ISD::ExternalSymbol: return "ExternalSymbol"; - case ISD::BlockAddress: return "BlockAddress"; - case ISD::INTRINSIC_WO_CHAIN: - case ISD::INTRINSIC_VOID: - case ISD::INTRINSIC_W_CHAIN: { - unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; - unsigned IID = cast(getOperand(OpNo))->getZExtValue(); - if (IID < Intrinsic::num_intrinsics) - return Intrinsic::getName((Intrinsic::ID)IID); - else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) - return TII->getName(IID); - llvm_unreachable("Invalid intrinsic ID"); - } - - case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; - case ISD::TargetConstant: return "TargetConstant"; - case ISD::TargetConstantFP:return "TargetConstantFP"; - case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; - case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; - case ISD::TargetFrameIndex: return "TargetFrameIndex"; - case ISD::TargetJumpTable: return "TargetJumpTable"; - case ISD::TargetConstantPool: return "TargetConstantPool"; - case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; - case ISD::TargetBlockAddress: return "TargetBlockAddress"; - - case ISD::CopyToReg: return "CopyToReg"; - case ISD::CopyFromReg: return "CopyFromReg"; - case ISD::UNDEF: return "undef"; - case ISD::MERGE_VALUES: return "merge_values"; - case ISD::INLINEASM: return "inlineasm"; - case ISD::EH_LABEL: return "eh_label"; - case ISD::HANDLENODE: return "handlenode"; - - // Unary operators - case ISD::FABS: return "fabs"; - case ISD::FNEG: return "fneg"; - case ISD::FSQRT: return "fsqrt"; - case ISD::FSIN: return "fsin"; - case ISD::FCOS: return "fcos"; - case ISD::FTRUNC: return "ftrunc"; - case ISD::FFLOOR: return "ffloor"; - case ISD::FCEIL: return "fceil"; - case ISD::FRINT: return "frint"; - case ISD::FNEARBYINT: return "fnearbyint"; - case ISD::FEXP: return "fexp"; - case ISD::FEXP2: return "fexp2"; - case ISD::FLOG: return "flog"; - case ISD::FLOG2: return "flog2"; - case ISD::FLOG10: return "flog10"; - - // Binary operators - case ISD::ADD: return "add"; - case ISD::SUB: return "sub"; - case ISD::MUL: return "mul"; - case ISD::MULHU: return "mulhu"; - case ISD::MULHS: return "mulhs"; - case ISD::SDIV: return "sdiv"; - case ISD::UDIV: return "udiv"; - case ISD::SREM: return "srem"; - case ISD::UREM: return "urem"; - case ISD::SMUL_LOHI: return "smul_lohi"; - case ISD::UMUL_LOHI: return "umul_lohi"; - case ISD::SDIVREM: return "sdivrem"; - case ISD::UDIVREM: return "udivrem"; - case ISD::AND: return "and"; - case ISD::OR: return "or"; - case ISD::XOR: return "xor"; - case ISD::SHL: return "shl"; - case ISD::SRA: return "sra"; - case ISD::SRL: return "srl"; - case ISD::ROTL: return "rotl"; - case ISD::ROTR: return "rotr"; - case ISD::FADD: return "fadd"; - case ISD::FSUB: return "fsub"; - case ISD::FMUL: return "fmul"; - case ISD::FDIV: return "fdiv"; - case ISD::FMA: return "fma"; - case ISD::FREM: return "frem"; - case ISD::FCOPYSIGN: return "fcopysign"; - case ISD::FGETSIGN: return "fgetsign"; - case ISD::FPOW: return "fpow"; - - case ISD::FPOWI: return "fpowi"; - case ISD::SETCC: return "setcc"; - case ISD::SELECT: return "select"; - case ISD::VSELECT: return "vselect"; - case ISD::SELECT_CC: return "select_cc"; - case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; - case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; - case ISD::CONCAT_VECTORS: return "concat_vectors"; - case ISD::INSERT_SUBVECTOR: return "insert_subvector"; - case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; - case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; - case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; - case ISD::CARRY_FALSE: return "carry_false"; - case ISD::ADDC: return "addc"; - case ISD::ADDE: return "adde"; - case ISD::SADDO: return "saddo"; - case ISD::UADDO: return "uaddo"; - case ISD::SSUBO: return "ssubo"; - case ISD::USUBO: return "usubo"; - case ISD::SMULO: return "smulo"; - case ISD::UMULO: return "umulo"; - case ISD::SUBC: return "subc"; - case ISD::SUBE: return "sube"; - case ISD::SHL_PARTS: return "shl_parts"; - case ISD::SRA_PARTS: return "sra_parts"; - case ISD::SRL_PARTS: return "srl_parts"; - - // Conversion operators. - case ISD::SIGN_EXTEND: return "sign_extend"; - case ISD::ZERO_EXTEND: return "zero_extend"; - case ISD::ANY_EXTEND: return "any_extend"; - case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; - case ISD::TRUNCATE: return "truncate"; - case ISD::FP_ROUND: return "fp_round"; - case ISD::FLT_ROUNDS_: return "flt_rounds"; - case ISD::FP_ROUND_INREG: return "fp_round_inreg"; - case ISD::FP_EXTEND: return "fp_extend"; - - case ISD::SINT_TO_FP: return "sint_to_fp"; - case ISD::UINT_TO_FP: return "uint_to_fp"; - case ISD::FP_TO_SINT: return "fp_to_sint"; - case ISD::FP_TO_UINT: return "fp_to_uint"; - case ISD::BITCAST: return "bitcast"; - case ISD::FP16_TO_FP32: return "fp16_to_fp32"; - case ISD::FP32_TO_FP16: return "fp32_to_fp16"; - - case ISD::CONVERT_RNDSAT: { - switch (cast(this)->getCvtCode()) { - default: llvm_unreachable("Unknown cvt code!"); - case ISD::CVT_FF: return "cvt_ff"; - case ISD::CVT_FS: return "cvt_fs"; - case ISD::CVT_FU: return "cvt_fu"; - case ISD::CVT_SF: return "cvt_sf"; - case ISD::CVT_UF: return "cvt_uf"; - case ISD::CVT_SS: return "cvt_ss"; - case ISD::CVT_SU: return "cvt_su"; - case ISD::CVT_US: return "cvt_us"; - case ISD::CVT_UU: return "cvt_uu"; - } - } - - // Control flow instructions - case ISD::BR: return "br"; - case ISD::BRIND: return "brind"; - case ISD::BR_JT: return "br_jt"; - case ISD::BRCOND: return "brcond"; - case ISD::BR_CC: return "br_cc"; - case ISD::CALLSEQ_START: return "callseq_start"; - case ISD::CALLSEQ_END: return "callseq_end"; - - // Other operators - case ISD::LOAD: return "load"; - case ISD::STORE: return "store"; - case ISD::VAARG: return "vaarg"; - case ISD::VACOPY: return "vacopy"; - case ISD::VAEND: return "vaend"; - case ISD::VASTART: return "vastart"; - case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; - case ISD::EXTRACT_ELEMENT: return "extract_element"; - case ISD::BUILD_PAIR: return "build_pair"; - case ISD::STACKSAVE: return "stacksave"; - case ISD::STACKRESTORE: return "stackrestore"; - case ISD::TRAP: return "trap"; - - // Bit manipulation - case ISD::BSWAP: return "bswap"; - case ISD::CTPOP: return "ctpop"; - case ISD::CTTZ: return "cttz"; - case ISD::CTLZ: return "ctlz"; - - // Trampolines - case ISD::INIT_TRAMPOLINE: return "init_trampoline"; - case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; - - case ISD::CONDCODE: - switch (cast(this)->get()) { - default: llvm_unreachable("Unknown setcc condition!"); - case ISD::SETOEQ: return "setoeq"; - case ISD::SETOGT: return "setogt"; - case ISD::SETOGE: return "setoge"; - case ISD::SETOLT: return "setolt"; - case ISD::SETOLE: return "setole"; - case ISD::SETONE: return "setone"; - - case ISD::SETO: return "seto"; - case ISD::SETUO: return "setuo"; - case ISD::SETUEQ: return "setue"; - case ISD::SETUGT: return "setugt"; - case ISD::SETUGE: return "setuge"; - case ISD::SETULT: return "setult"; - case ISD::SETULE: return "setule"; - case ISD::SETUNE: return "setune"; - - case ISD::SETEQ: return "seteq"; - case ISD::SETGT: return "setgt"; - case ISD::SETGE: return "setge"; - case ISD::SETLT: return "setlt"; - case ISD::SETLE: return "setle"; - case ISD::SETNE: return "setne"; - } - } -} - -const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { - switch (AM) { - default: - return ""; - case ISD::PRE_INC: - return ""; - case ISD::PRE_DEC: - return ""; - case ISD::POST_INC: - return ""; - case ISD::POST_DEC: - return ""; - } -} - -std::string ISD::ArgFlagsTy::getArgFlagsString() { - std::string S = "< "; - - if (isZExt()) - S += "zext "; - if (isSExt()) - S += "sext "; - if (isInReg()) - S += "inreg "; - if (isSRet()) - S += "sret "; - if (isByVal()) - S += "byval "; - if (isNest()) - S += "nest "; - if (getByValAlign()) - S += "byval-align:" + utostr(getByValAlign()) + " "; - if (getOrigAlign()) - S += "orig-align:" + utostr(getOrigAlign()) + " "; - if (getByValSize()) - S += "byval-size:" + utostr(getByValSize()) + " "; - return S + ">"; -} - -void SDNode::dump() const { dump(0); } -void SDNode::dump(const SelectionDAG *G) const { - print(dbgs(), G); - dbgs() << '\n'; -} - -void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { - OS << (void*)this << ": "; - - for (unsigned i = 0, e = getNumValues(); i != e; ++i) { - if (i) OS << ","; - if (getValueType(i) == MVT::Other) - OS << "ch"; - else - OS << getValueType(i).getEVTString(); - } - OS << " = " << getOperationName(G); -} - -void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { - if (const MachineSDNode *MN = dyn_cast(this)) { - if (!MN->memoperands_empty()) { - OS << "<"; - OS << "Mem:"; - for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), - e = MN->memoperands_end(); i != e; ++i) { - OS << **i; - if (llvm::next(i) != e) - OS << " "; - } - OS << ">"; - } - } else if (const ShuffleVectorSDNode *SVN = - dyn_cast(this)) { - OS << "<"; - for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { - int Idx = SVN->getMaskElt(i); - if (i) OS << ","; - if (Idx < 0) - OS << "u"; - else - OS << Idx; - } - OS << ">"; - } else if (const ConstantSDNode *CSDN = dyn_cast(this)) { - OS << '<' << CSDN->getAPIntValue() << '>'; - } else if (const ConstantFPSDNode *CSDN = dyn_cast(this)) { - if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) - OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; - else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble) - OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; - else { - OS << "getValueAPF().bitcastToAPInt().dump(); - OS << ")>"; - } - } else if (const GlobalAddressSDNode *GADN = - dyn_cast(this)) { - int64_t offset = GADN->getOffset(); - OS << '<'; - WriteAsOperand(OS, GADN->getGlobal()); - OS << '>'; - if (offset > 0) - OS << " + " << offset; - else - OS << " " << offset; - if (unsigned int TF = GADN->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const FrameIndexSDNode *FIDN = dyn_cast(this)) { - OS << "<" << FIDN->getIndex() << ">"; - } else if (const JumpTableSDNode *JTDN = dyn_cast(this)) { - OS << "<" << JTDN->getIndex() << ">"; - if (unsigned int TF = JTDN->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const ConstantPoolSDNode *CP = dyn_cast(this)){ - int offset = CP->getOffset(); - if (CP->isMachineConstantPoolEntry()) - OS << "<" << *CP->getMachineCPVal() << ">"; - else - OS << "<" << *CP->getConstVal() << ">"; - if (offset > 0) - OS << " + " << offset; - else - OS << " " << offset; - if (unsigned int TF = CP->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const BasicBlockSDNode *BBDN = dyn_cast(this)) { - OS << "<"; - const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); - if (LBB) - OS << LBB->getName() << " "; - OS << (const void*)BBDN->getBasicBlock() << ">"; - } else if (const RegisterSDNode *R = dyn_cast(this)) { - OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0); - } else if (const ExternalSymbolSDNode *ES = - dyn_cast(this)) { - OS << "'" << ES->getSymbol() << "'"; - if (unsigned int TF = ES->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const SrcValueSDNode *M = dyn_cast(this)) { - if (M->getValue()) - OS << "<" << M->getValue() << ">"; - else - OS << ""; - } else if (const MDNodeSDNode *MD = dyn_cast(this)) { - if (MD->getMD()) - OS << "<" << MD->getMD() << ">"; - else - OS << ""; - } else if (const VTSDNode *N = dyn_cast(this)) { - OS << ":" << N->getVT().getEVTString(); - } - else if (const LoadSDNode *LD = dyn_cast(this)) { - OS << "<" << *LD->getMemOperand(); - - bool doExt = true; - switch (LD->getExtensionType()) { - default: doExt = false; break; - case ISD::EXTLOAD: OS << ", anyext"; break; - case ISD::SEXTLOAD: OS << ", sext"; break; - case ISD::ZEXTLOAD: OS << ", zext"; break; - } - if (doExt) - OS << " from " << LD->getMemoryVT().getEVTString(); - - const char *AM = getIndexedModeName(LD->getAddressingMode()); - if (*AM) - OS << ", " << AM; - - OS << ">"; - } else if (const StoreSDNode *ST = dyn_cast(this)) { - OS << "<" << *ST->getMemOperand(); - - if (ST->isTruncatingStore()) - OS << ", trunc to " << ST->getMemoryVT().getEVTString(); - - const char *AM = getIndexedModeName(ST->getAddressingMode()); - if (*AM) - OS << ", " << AM; - - OS << ">"; - } else if (const MemSDNode* M = dyn_cast(this)) { - OS << "<" << *M->getMemOperand() << ">"; - } else if (const BlockAddressSDNode *BA = - dyn_cast(this)) { - OS << "<"; - WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); - OS << ", "; - WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); - OS << ">"; - if (unsigned int TF = BA->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } - - if (G) - if (unsigned Order = G->GetOrdering(this)) - OS << " [ORD=" << Order << ']'; - - if (getNodeId() != -1) - OS << " [ID=" << getNodeId() << ']'; - - DebugLoc dl = getDebugLoc(); - if (G && !dl.isUnknown()) { - DIScope - Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); - OS << " dbg:"; - // Omit the directory, since it's usually long and uninteresting. - if (Scope.Verify()) - OS << Scope.getFilename(); - else - OS << ""; - OS << ':' << dl.getLine(); - if (dl.getCol() != 0) - OS << ':' << dl.getCol(); - } -} - -void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { - print_types(OS, G); - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - if (i) OS << ", "; else OS << " "; - OS << (void*)getOperand(i).getNode(); - if (unsigned RN = getOperand(i).getResNo()) - OS << ":" << RN; - } - print_details(OS, G); -} - -static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, - const SelectionDAG *G, unsigned depth, - unsigned indent) { - if (depth == 0) - return; - - OS.indent(indent); - - N->print(OS, G); - - if (depth < 1) - return; - - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - // Don't follow chain operands. - if (N->getOperand(i).getValueType() == MVT::Other) - continue; - OS << '\n'; - printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2); - } -} - -void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, - unsigned depth) const { - printrWithDepthHelper(OS, this, G, depth, 0); -} - -void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { - // Don't print impossibly deep things. - printrWithDepth(OS, G, 10); -} - -void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { - printrWithDepth(dbgs(), G, depth); -} - -void SDNode::dumprFull(const SelectionDAG *G) const { - // Don't print impossibly deep things. - dumprWithDepth(G, 10); -} - -static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - if (N->getOperand(i).getNode()->hasOneUse()) - DumpNodes(N->getOperand(i).getNode(), indent+2, G); - else - dbgs() << "\n" << std::string(indent+2, ' ') - << (void*)N->getOperand(i).getNode() << ": "; - - - dbgs() << "\n"; - dbgs().indent(indent); - N->dump(G); -} - SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { assert(N->getNumValues() == 1 && "Can't unroll a vector with multiple results!"); @@ -6527,20 +6003,14 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { const GlobalValue *GV; int64_t GVOffset = 0; if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - // If GV has specified alignment, then use it. Otherwise, use the preferred - // alignment. - unsigned Align = GV->getAlignment(); - if (!Align) { - if (const GlobalVariable *GVar = dyn_cast(GV)) { - if (GVar->hasInitializer()) { - const TargetData *TD = TLI.getTargetData(); - Align = TD->getPreferredAlignment(GVar); - } - } - if (!Align) - Align = TLI.getTargetData()->getABITypeAlignment(GV->getType()); - } - return MinAlign(Align, GVOffset); + unsigned PtrWidth = TLI.getPointerTy().getSizeInBits(); + APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); + llvm::ComputeMaskedBits(const_cast(GV), KnownZero, KnownOne, + TLI.getTargetData()); + unsigned AlignBits = KnownZero.countTrailingOnes(); + unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; + if (Align) + return MinAlign(Align, GVOffset); } // If this is a direct reference to a stack slot, use information about the @@ -6566,74 +6036,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { return 0; } -void SelectionDAG::dump() const { - dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; - - for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); - I != E; ++I) { - const SDNode *N = I; - if (!N->hasOneUse() && N != getRoot().getNode()) - DumpNodes(N, 2, this); - } - - if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); - - dbgs() << "\n\n"; -} - -void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { - print_types(OS, G); - print_details(OS, G); -} - -typedef SmallPtrSet VisitedSDNodeSet; -static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, - const SelectionDAG *G, VisitedSDNodeSet &once) { - if (!once.insert(N)) // If we've been here before, return now. - return; - - // Dump the current SDNode, but don't end the line yet. - OS << std::string(indent, ' '); - N->printr(OS, G); - - // Having printed this SDNode, walk the children: - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - const SDNode *child = N->getOperand(i).getNode(); - - if (i) OS << ","; - OS << " "; - - if (child->getNumOperands() == 0) { - // This child has no grandchildren; print it inline right here. - child->printr(OS, G); - once.insert(child); - } else { // Just the address. FIXME: also print the child's opcode. - OS << (void*)child; - if (unsigned RN = N->getOperand(i).getResNo()) - OS << ":" << RN; - } - } - - OS << "\n"; - - // Dump children that have grandchildren on their own line(s). - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - const SDNode *child = N->getOperand(i).getNode(); - DumpNodesr(OS, child, indent+2, G, once); - } -} - -void SDNode::dumpr() const { - VisitedSDNodeSet once; - DumpNodesr(dbgs(), this, 0, 0, once); -} - -void SDNode::dumpr(const SelectionDAG *G) const { - VisitedSDNodeSet once; - DumpNodesr(dbgs(), this, 0, G, once); -} - - // getAddressSpace - Return the address space this GlobalAddress belongs to. unsigned GlobalAddressSDNode::getAddressSpace() const { return getGlobal()->getType()->getAddressSpace(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 095b4001696f..94cb95804f69 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -41,13 +41,13 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" @@ -197,7 +197,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, // FP_ROUND's are always exact here. if (ValueVT.bitsLT(Val.getValueType())) return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, - DAG.getIntPtrConstant(1)); + DAG.getTargetConstant(1, TLI.getPointerTy())); return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } @@ -206,7 +206,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); llvm_unreachable("Unknown mismatch!"); - return SDValue(); } /// getCopyFromParts - Create a value that contains the specified legal parts @@ -353,10 +352,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, assert(NumParts == 1 && "Do not know what to promote to!"); Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); } else { - assert(PartVT.isInteger() && ValueVT.isInteger() && + assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && + ValueVT.isInteger() && "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); + if (PartVT == MVT::x86mmx) + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } } else if (PartBits == ValueVT.getSizeInBits()) { // Different types of the same size. @@ -364,10 +366,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { // If the parts cover less bits than value has, truncate the value. - assert(PartVT.isInteger() && ValueVT.isInteger() && + assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && + ValueVT.isInteger() && "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + if (PartVT == MVT::x86mmx) + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } // The value may have changed - recompute ValueVT. @@ -813,9 +818,11 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, } } -void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { +void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, + const TargetLibraryInfo *li) { AA = &aa; GFI = gfi; + LibInfo = li; TD = DAG.getTarget().getTargetData(); LPadToCallSiteMap.clear(); } @@ -964,7 +971,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DAG.AddDbgValue(SDV, Val.getNode(), false); } } else - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); DanglingDebugInfoMap[V] = DanglingDebugInfo(); } } @@ -1054,6 +1061,23 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { return DAG.getMergeValues(&Constants[0], Constants.size(), getCurDebugLoc()); } + + if (const ConstantDataSequential *CDS = + dyn_cast(C)) { + SmallVector Ops; + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode(); + // Add each leaf value from the operand to the Constants list + // to form a flattened list of all the values. + for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) + Ops.push_back(SDValue(Val, i)); + } + + if (isa(CDS->getType())) + return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc()); + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size()); + } if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { assert((isa(C) || isa(C)) && @@ -1088,9 +1112,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { // Now that we know the number and type of the elements, get that number of // elements into the Ops array based on what kind of constant it is. SmallVector Ops; - if (const ConstantVector *CP = dyn_cast(C)) { + if (const ConstantVector *CV = dyn_cast(C)) { for (unsigned i = 0; i != NumElements; ++i) - Ops.push_back(getValue(CP->getOperand(i))); + Ops.push_back(getValue(CV->getOperand(i))); } else { assert(isa(C) && "Unknown vector constant!"); EVT EltVT = TLI.getValueType(VecTy->getElementType()); @@ -1126,7 +1150,6 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } llvm_unreachable("Can't get register for value!"); - return SDValue(); } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { @@ -1285,8 +1308,8 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, } /// Return branch probability calculated by BranchProbabilityInfo for IR blocks. -uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src, - MachineBasicBlock *Dst) { +uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { BranchProbabilityInfo *BPI = FuncInfo.BPI; if (!BPI) return 0; @@ -1336,6 +1359,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, Condition = getICmpCondCode(IC->getPredicate()); } else if (const FCmpInst *FC = dyn_cast(Cond)) { Condition = getFCmpCondCode(FC->getPredicate()); + if (TM.Options.NoNaNsFPMath) + Condition = getFCmpCodeWithoutNaN(Condition); } else { Condition = ISD::SETEQ; // silence warning. llvm_unreachable("Unknown compare instruction"); @@ -1811,8 +1836,8 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { CopyToExportRegsIfNeeded(&I); // Update successor info - InvokeMBB->addSuccessor(Return); - InvokeMBB->addSuccessor(LandingPad); + addSuccessorWithWeight(InvokeMBB, Return); + addSuccessorWithWeight(InvokeMBB, LandingPad); // Drop into normal successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), @@ -1820,9 +1845,6 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { DAG.getBasicBlock(Return))); } -void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) { -} - void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); } @@ -1835,6 +1857,12 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); AddLandingPadInfo(LP, MMI, MBB); + // If there aren't registers to copy the values into (e.g., during SjLj + // exceptions), then don't bother to create these DAG nodes. + if (TLI.getExceptionPointerRegister() == 0 && + TLI.getExceptionSelectorRegister() == 0) + return; + SmallVector ValueVTs; ComputeValueVTs(TLI, LP.getType(), ValueVTs); @@ -2003,7 +2031,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } static inline bool areJTsAllowed(const TargetLowering &TLI) { - return !DisableJumpTables && + return !TLI.getTargetMachine().Options.DisableJumpTables && (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); } @@ -2190,7 +2218,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, CaseRange LHSR(CR.Range.first, Pivot); CaseRange RHSR(Pivot, CR.Range.second); - Constant *C = Pivot->Low; + const Constant *C = Pivot->Low; MachineBasicBlock *FalseBB = 0, *TrueBB = 0; // We know that we branch to the LHS if the Value being switched on is @@ -2383,14 +2411,14 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases - for (size_t i = 1; i < SI.getNumSuccessors(); ++i) { - BasicBlock *SuccBB = SI.getSuccessor(i); + for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); + i != e; ++i) { + const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0; - Cases.push_back(Case(SI.getSuccessorValue(i), - SI.getSuccessorValue(i), + Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), SMBB, ExtraWeight)); } std::sort(Cases.begin(), Cases.end(), CaseCmp()); @@ -2457,7 +2485,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // If there is only the default destination, branch to it if it is not the // next basic block. Otherwise, just fall through. - if (SI.getNumCases() == 1) { + if (!SI.getNumCases()) { // Update machine-CFG edges. // If this is not a fall-through branch, emit the branch. @@ -2626,6 +2654,8 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); + if (TM.Options.NoNaNsFPMath) + Condition = getFCmpCodeWithoutNaN(Condition); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); } @@ -2685,11 +2715,12 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) { SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), - DestVT, N, DAG.getIntPtrConstant(0))); + DestVT, N, + DAG.getTargetConstant(0, TLI.getPointerTy()))); } void SelectionDAGBuilder::visitFPExt(const User &I){ - // FPTrunc is never a no-op cast, no need to check + // FPExt is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); @@ -2772,33 +2803,25 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { TLI.getValueType(I.getType()), InVec, InIdx)); } -// Utility for visitShuffleVector - Returns true if the mask is mask starting -// from SIndx and increasing to the element length (undefs are allowed). -static bool SequentialMask(SmallVectorImpl &Mask, unsigned SIndx) { - unsigned MaskNumElts = Mask.size(); - for (unsigned i = 0; i != MaskNumElts; ++i) - if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx))) +// Utility for visitShuffleVector - Return true if every element in Mask, +// begining from position Pos and ending in Pos+Size, falls within the +// specified sequential range [L, L+Pos). or is undef. +static bool isSequentialInRange(const SmallVectorImpl &Mask, + unsigned Pos, unsigned Size, int Low) { + for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) + if (Mask[i] >= 0 && Mask[i] != Low) return false; return true; } void SelectionDAGBuilder::visitShuffleVector(const User &I) { - SmallVector Mask; SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); - // Convert the ConstantVector mask operand into an array of ints, with -1 - // representing undef values. - SmallVector MaskElts; - cast(I.getOperand(2))->getVectorElements(MaskElts); - unsigned MaskNumElts = MaskElts.size(); - for (unsigned i = 0; i != MaskNumElts; ++i) { - if (isa(MaskElts[i])) - Mask.push_back(-1); - else - Mask.push_back(cast(MaskElts[i])->getSExtValue()); - } - + SmallVector Mask; + ShuffleVectorInst::getShuffleMask(cast(I.getOperand(2)), Mask); + unsigned MaskNumElts = Mask.size(); + EVT VT = TLI.getValueType(I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); @@ -2814,11 +2837,23 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Mask is longer than the source vectors and is a multiple of the source // vectors. We can use concatenate vector to make the mask and vectors // lengths match. - if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) { - // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), - VT, Src1, Src2)); - return; + if (SrcNumElts*2 == MaskNumElts) { + // First check for Src1 in low and Src2 in high + if (isSequentialInRange(Mask, 0, SrcNumElts, 0) && + isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) { + // The shuffle is concatenating two vectors together. + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + VT, Src1, Src2)); + return; + } + // Then check for Src2 in low and Src1 in high + if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) && + isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) { + // The shuffle is concatenating two vectors together. + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + VT, Src2, Src1)); + return; + } } // Pad both vectors with undefs to make them the same length as the mask. @@ -2843,10 +2878,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SmallVector MappedOps; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; - if (Idx < (int)SrcNumElts) - MappedOps.push_back(Idx); - else - MappedOps.push_back(Idx + MaskNumElts - SrcNumElts); + if (Idx >= (int)SrcNumElts) + Idx -= SrcNumElts - MaskNumElts; + MappedOps.push_back(Idx); } setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, @@ -2858,13 +2892,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Analyze the access pattern of the vector to see if we can extract // two subvectors and do the shuffle. The analysis is done by calculating // the range of elements the mask access on both vectors. - int MinRange[2] = { static_cast(SrcNumElts+1), - static_cast(SrcNumElts+1)}; + int MinRange[2] = { static_cast(SrcNumElts), + static_cast(SrcNumElts)}; int MaxRange[2] = {-1, -1}; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; - int Input = 0; + unsigned Input = 0; if (Idx < 0) continue; @@ -2880,35 +2914,31 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Check if the access is smaller than the vector size and can we find // a reasonable extract index. - int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not - // Extract. + int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not + // Extract. int StartIdx[2]; // StartIdx to extract from - for (int Input=0; Input < 2; ++Input) { - if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) { + for (unsigned Input = 0; Input < 2; ++Input) { + if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) { RangeUse[Input] = 0; // Unused StartIdx[Input] = 0; - } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) { - // Fits within range but we should see if we can find a good - // start index that is a multiple of the mask length. - if (MaxRange[Input] < (int)MaskNumElts) { - RangeUse[Input] = 1; // Extract from beginning of the vector - StartIdx[Input] = 0; - } else { - StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; - if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && - StartIdx[Input] + MaskNumElts <= SrcNumElts) - RangeUse[Input] = 1; // Extract from a multiple of the mask length. - } + continue; } + + // Find a good start index that is a multiple of the mask length. Then + // see if the rest of the elements are in range. + StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; + if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && + StartIdx[Input] + MaskNumElts <= SrcNumElts) + RangeUse[Input] = 1; // Extract from a multiple of the mask length. } if (RangeUse[0] == 0 && RangeUse[1] == 0) { setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. return; } - else if (RangeUse[0] < 2 && RangeUse[1] < 2) { + if (RangeUse[0] >= 0 && RangeUse[1] >= 0) { // Extract appropriate subvector and generate a vector shuffle - for (int Input=0; Input < 2; ++Input) { + for (unsigned Input = 0; Input < 2; ++Input) { SDValue &Src = Input == 0 ? Src1 : Src2; if (RangeUse[Input] == 0) Src = DAG.getUNDEF(VT); @@ -2921,12 +2951,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SmallVector MappedOps; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; - if (Idx < 0) - MappedOps.push_back(Idx); - else if (Idx < (int)SrcNumElts) - MappedOps.push_back(Idx - StartIdx[0]); - else - MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts); + if (Idx >= 0) { + if (Idx < (int)SrcNumElts) + Idx -= StartIdx[0]; + else + Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; + } + MappedOps.push_back(Idx); } setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, @@ -2942,22 +2973,20 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { EVT PtrVT = TLI.getPointerTy(); SmallVector Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { - if (Mask[i] < 0) { - Ops.push_back(DAG.getUNDEF(EltVT)); + int Idx = Mask[i]; + SDValue Res; + + if (Idx < 0) { + Res = DAG.getUNDEF(EltVT); } else { - int Idx = Mask[i]; - SDValue Res; + SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; + if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; - if (Idx < (int)SrcNumElts) - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - EltVT, Src1, DAG.getConstant(Idx, PtrVT)); - else - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - EltVT, Src2, - DAG.getConstant(Idx - SrcNumElts, PtrVT)); - - Ops.push_back(Res); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + EltVT, Src, DAG.getConstant(Idx, PtrVT)); } + + Ops.push_back(Res); } setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), @@ -3042,7 +3071,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue N = getValue(I.getOperand(0)); - Type *Ty = I.getOperand(0)->getType(); + // Note that the pointer operand may be a vector of pointers. Take the scalar + // element which holds a pointer. + Type *Ty = I.getOperand(0)->getType()->getScalarType(); for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { @@ -3096,7 +3127,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { unsigned Amt = ElementSize.logBase2(); IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(), N.getValueType(), IdxN, - DAG.getConstant(Amt, TLI.getPointerTy())); + DAG.getConstant(Amt, IdxN.getValueType())); } else { SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy()); IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), @@ -3175,8 +3206,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata("nontemporal") != 0; + bool isInvariant = I.getMetadata("invariant.load") != 0; unsigned Alignment = I.getAlignment(); const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); SmallVector ValueVTs; SmallVector Offsets; @@ -3224,7 +3257,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { DAG.getConstant(Offsets[i], PtrVT)); SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, - isNonTemporal, Alignment, TBAAInfo); + isNonTemporal, isInvariant, Alignment, TBAAInfo, + Ranges); Values[i] = L; Chains[ChainI] = L.getValue(1); @@ -3358,7 +3392,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { DebugLoc dl = getCurDebugLoc(); ISD::NodeType NT; switch (I.getOperation()) { - default: llvm_unreachable("Unknown atomicrmw operation"); return; + default: llvm_unreachable("Unknown atomicrmw operation"); case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break; case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break; case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break; @@ -3496,24 +3530,16 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); + Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { SDValue Op = getValue(I.getArgOperand(i)); - assert(TLI.isTypeLegal(Op.getValueType()) && - "Intrinsic uses a non-legal type?"); Ops.push_back(Op); } SmallVector ValueVTs; ComputeValueVTs(TLI, I.getType(), ValueVTs); -#ifndef NDEBUG - for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) { - assert(TLI.isTypeLegal(ValueVTs[Val]) && - "Intrinsic uses a non-legal type?"); - } -#endif // NDEBUG if (HasChain) ValueVTs.push_back(MVT::Other); @@ -3556,6 +3582,12 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } setValue(&I, Result); + } else { + // Assign order to result here. If the intrinsic does not produce a result, + // it won't be mapped to a SDNode and visit() will not assign it an order + // number. + ++SDNodeOrder; + AssignOrderingToNode(Result.getNode()); } } @@ -3597,17 +3629,6 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) { return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32); } -// implVisitAluOverflow - Lower arithmetic overflow instrinsics. -const char * -SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) { - SDValue Op1 = getValue(I.getArgOperand(0)); - SDValue Op2 = getValue(I.getArgOperand(1)); - - SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); - setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); - return 0; -} - /// visitExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. void @@ -4367,9 +4388,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) { const SDValue &CFR = Ext.getOperand(0); if (CFR.getOpcode() == ISD::CopyFromReg) return cast(CFR.getOperand(1))->getReg(); - else - if (CFR.getOpcode() == ISD::TRUNCATE) - return getTruncatedArgReg(CFR); + if (CFR.getOpcode() == ISD::TRUNCATE) + return getTruncatedArgReg(CFR); } return 0; } @@ -4398,7 +4418,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); if (Offset) - Reg = TRI->getFrameRegister(MF); + Reg = TRI->getFrameRegister(MF); if (!Reg && N.getNode()) { if (N.getOpcode() == ISD::CopyFromReg) @@ -4473,9 +4493,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)))); return 0; case Intrinsic::setjmp: - return "_setjmp"+!TLI.usesUnderscoreSetJmp(); + return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; case Intrinsic::longjmp: - return "_longjmp"+!TLI.usesUnderscoreLongJmp(); + return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { // Assert for address < 256 since we support only user defined address // spaces. @@ -4531,8 +4551,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const DbgDeclareInst &DI = cast(I); MDNode *Variable = DI.getVariable(); const Value *Address = DI.getAddress(); - if (!Address || !DIVariable(Variable).Verify()) + if (!Address || !DIVariable(Variable).Verify()) { + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return 0; + } // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder // but do not always have a corresponding SDNode built. The SDNodeOrder @@ -4543,7 +4565,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Check if address has undef value. if (isa(Address) || (Address->use_empty() && !isa(Address))) { - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return 0; } @@ -4553,11 +4575,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { N = UnusedArgNodeMap[Address]; SDDbgValue *SDV; if (N.getNode()) { - // Parameters are handled specially. - bool isParameter = - DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable; if (const BitCastInst *BCI = dyn_cast(Address)) Address = BCI->getOperand(0); + // Parameters are handled specially. + bool isParameter = + (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable || + isa(Address)); + const AllocaInst *AI = dyn_cast(Address); if (isParameter && !AI) { @@ -4577,7 +4601,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { 0, dl, SDNodeOrder); else { // Can't do anything with other non-AI cases yet. - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t"); + DEBUG(Address->dump()); return 0; } DAG.AddDbgValue(SDV, N.getNode(), isParameter); @@ -4599,7 +4625,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } } } - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } return 0; @@ -4645,7 +4671,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else { // We may expand this to cover more cases. One case where we have no // data available is an unreferenced parameter. - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } @@ -4654,8 +4680,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { V = BCI->getOperand(0); const AllocaInst *AI = dyn_cast(V); // Don't handle byval struct arguments or VLAs, for example. - if (!AI) + if (!AI) { + DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); + DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); return 0; + } DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI == FuncInfo.StaticAllocaMap.end()) @@ -4667,43 +4696,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); return 0; } - case Intrinsic::eh_exception: { - // Insert the EXCEPTIONADDR instruction. - assert(FuncInfo.MBB->isLandingPad() && - "Call to eh.exception not in landing pad!"); - SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); - SDValue Ops[1]; - Ops[0] = DAG.getRoot(); - SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1); - setValue(&I, Op); - DAG.setRoot(Op.getValue(1)); - return 0; - } - - case Intrinsic::eh_selector: { - MachineBasicBlock *CallMBB = FuncInfo.MBB; - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - if (CallMBB->isLandingPad()) - AddCatchInfo(I, &MMI, CallMBB); - else { -#ifndef NDEBUG - FuncInfo.CatchInfoLost.insert(&I); -#endif - // FIXME: Mark exception selector register as live in. Hack for PR1508. - unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBB->addLiveIn(Reg); - } - - // Insert the EHSELECTION instruction. - SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); - SDValue Ops[2]; - Ops[0] = getValue(I.getArgOperand(0)); - Ops[1] = getRoot(); - SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2); - DAG.setRoot(Op.getValue(1)); - setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32)); - return 0; - } case Intrinsic::eh_typeid_for: { // Find the type id for the given typeinfo. @@ -4775,11 +4767,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getRoot(), getValue(I.getArgOperand(0)))); return 0; } - case Intrinsic::eh_sjlj_dispatch_setup: { - DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, - getRoot(), getValue(I.getArgOperand(0)))); - return 0; - } case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: @@ -4841,6 +4828,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, Res); return 0; } + case Intrinsic::x86_avx_vinsertf128_pd_256: + case Intrinsic::x86_avx_vinsertf128_ps_256: + case Intrinsic::x86_avx_vinsertf128_si_256: + case Intrinsic::x86_avx2_vinserti128: { + DebugLoc dl = getCurDebugLoc(); + EVT DestVT = TLI.getValueType(I.getType()); + EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); + uint64_t Idx = (cast(I.getArgOperand(2))->getZExtValue() & 1) * + ElVT.getVectorNumElements(); + Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT, + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + DAG.getConstant(Idx, MVT::i32)); + setValue(&I, Res); + return 0; + } case Intrinsic::convertff: case Intrinsic::convertfsi: case Intrinsic::convertfui: @@ -4852,6 +4855,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::convertuu: { ISD::CvtCode Code = ISD::CVT_INVALID; switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::convertff: Code = ISD::CVT_FF; break; case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; case Intrinsic::convertfui: Code = ISD::CVT_FU; break; @@ -4946,14 +4950,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::cttz: { SDValue Arg = getValue(I.getArgOperand(0)); + ConstantInt *CI = cast(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); - setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg)); + setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, + dl, Ty, Arg)); return 0; } case Intrinsic::ctlz: { SDValue Arg = getValue(I.getArgOperand(0)); + ConstantInt *CI = cast(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); - setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg)); + setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, + dl, Ty, Arg)); return 0; } case Intrinsic::ctpop: { @@ -5052,7 +5060,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::gcread: case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); - return 0; case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); return 0; @@ -5064,7 +5071,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::trap: { - StringRef TrapFuncName = getTrapFunctionName(); + StringRef TrapFuncName = TM.Options.getTrapFunctionName(); if (TrapFuncName.empty()) { DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); return 0; @@ -5073,25 +5080,36 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { std::pair Result = TLI.LowerCallTo(getRoot(), I.getType(), false, false, false, false, 0, CallingConv::C, - /*isTailCall=*/false, /*isReturnValueUsed=*/true, + /*isTailCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), Args, DAG, getCurDebugLoc()); DAG.setRoot(Result.second); return 0; } case Intrinsic::uadd_with_overflow: - return implVisitAluOverflow(I, ISD::UADDO); case Intrinsic::sadd_with_overflow: - return implVisitAluOverflow(I, ISD::SADDO); case Intrinsic::usub_with_overflow: - return implVisitAluOverflow(I, ISD::USUBO); case Intrinsic::ssub_with_overflow: - return implVisitAluOverflow(I, ISD::SSUBO); case Intrinsic::umul_with_overflow: - return implVisitAluOverflow(I, ISD::UMULO); - case Intrinsic::smul_with_overflow: - return implVisitAluOverflow(I, ISD::SMULO); + case Intrinsic::smul_with_overflow: { + ISD::NodeType Op; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break; + case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break; + case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break; + case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break; + case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break; + case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break; + } + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); + setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); + return 0; + } case Intrinsic::prefetch: { SDValue Ops[5]; unsigned rw = cast(I.getArgOperand(1))->getZExtValue(); @@ -5226,7 +5244,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // If there's a possibility that fast-isel has already selected some amount // of the current basic block, don't emit a tail call. - if (isTailCall && EnableFastISel) + if (isTailCall && TM.Options.EnableFastISel) isTailCall = false; std::pair Result = @@ -5236,6 +5254,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(), CS.getCallingConv(), isTailCall, + CS.doesNotReturn(), !CS.getInstruction()->use_empty(), Callee, Args, DAG, getCurDebugLoc()); assert((isTailCall || Result.second.getNode()) && @@ -5264,7 +5283,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second, Add, MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), - false, false, 1); + false, false, false, 1); Values[i] = L; Chains[i] = L.getValue(1); } @@ -5375,7 +5394,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, Ptr, MachinePointerInfo(PtrVal), false /*volatile*/, - false /*nontemporal*/, 1 /* align=1 */); + false /*nontemporal*/, + false /*isinvariant*/, 1 /* align=1 */); if (!ConstantMemory) Builder.PendingLoads.push_back(LoadVal.getValue(1)); @@ -5470,23 +5490,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } - // See if any floating point values are being passed to this function. This is - // used to emit an undefined reference to fltused on Windows. - FunctionType *FT = - cast(I.getCalledValue()->getType()->getContainedType(0)); MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - if (FT->isVarArg() && - !MMI.callsExternalVAFunctionWithFloatingPointArguments()) { - for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { - Type* T = I.getArgOperand(i)->getType(); - for (po_iterator i = po_begin(T), e = po_end(T); - i != e; ++i) { - if (!i->isFloatingPointTy()) continue; - MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true); - break; - } - } - } + ComputeUsesVAFloatArgument(I, &MMI); const char *RenameFn = 0; if (Function *F = I.getCalledFunction()) { @@ -5509,7 +5514,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // can't be a library call. if (!F->hasLocalLinkage() && F->hasName()) { StringRef Name = F->getName(); - if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") { + if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") || + (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") || + (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) { if (I.getNumArgOperands() == 2 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && @@ -5520,7 +5527,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { LHS.getValueType(), LHS, RHS)); return; } - } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") { + } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") || + (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") || + (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) { if (I.getNumArgOperands() == 1 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType()) { @@ -5529,7 +5538,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Tmp.getValueType(), Tmp)); return; } - } else if (Name == "sin" || Name == "sinf" || Name == "sinl") { + } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") || + (LibInfo->has(LibFunc::sinf) && Name == "sinf") || + (LibInfo->has(LibFunc::sinl) && Name == "sinl")) { if (I.getNumArgOperands() == 1 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && @@ -5539,7 +5550,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Tmp.getValueType(), Tmp)); return; } - } else if (Name == "cos" || Name == "cosf" || Name == "cosl") { + } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") || + (LibInfo->has(LibFunc::cosf) && Name == "cosf") || + (LibInfo->has(LibFunc::cosl) && Name == "cosl")) { if (I.getNumArgOperands() == 1 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && @@ -5549,7 +5562,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Tmp.getValueType(), Tmp)); return; } - } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") { + } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") || + (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") || + (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) { if (I.getNumArgOperands() == 1 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && @@ -5559,6 +5574,85 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Tmp.getValueType(), Tmp)); return; } + } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") || + (LibInfo->has(LibFunc::floorf) && Name == "floorf") || + (LibInfo->has(LibFunc::floorl) && Name == "floorl")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") || + (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") || + (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") || + (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") || + (LibInfo->has(LibFunc::ceill) && Name == "ceill")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") || + (LibInfo->has(LibFunc::rintf) && Name == "rintf") || + (LibInfo->has(LibFunc::rintl) && Name == "rintl")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") || + (LibInfo->has(LibFunc::truncf) && Name == "truncf") || + (LibInfo->has(LibFunc::truncl) && Name == "truncl")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") || + (LibInfo->has(LibFunc::log2f) && Name == "log2f") || + (LibInfo->has(LibFunc::log2l) && Name == "log2l")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && + I.onlyReadsMemory()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") || + (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") || + (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && + I.onlyReadsMemory()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } } else if (Name == "memcmp") { if (visitMemCmpCall(I)) return; @@ -5596,22 +5690,6 @@ class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) { } - /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers - /// busy in OutputRegs/InputRegs. - void MarkAllocatedRegs(bool isOutReg, bool isInReg, - std::set &OutputRegs, - std::set &InputRegs, - const TargetRegisterInfo &TRI) const { - if (isOutReg) { - for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i) - MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI); - } - if (isInReg) { - for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i) - MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI); - } - } - /// getCallOperandValEVT - Return the EVT of the Value* that this operand /// corresponds to. If there is no Value* for this operand, it returns /// MVT::Other. @@ -5659,18 +5737,6 @@ class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { return TLI.getValueType(OpTy, true); } - -private: - /// MarkRegAndAliases - Mark the specified register and all aliases in the - /// specified set. - static void MarkRegAndAliases(unsigned Reg, std::set &Regs, - const TargetRegisterInfo &TRI) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg"); - Regs.insert(Reg); - if (const unsigned *Aliases = TRI.getAliasSet(Reg)) - for (; *Aliases; ++Aliases) - Regs.insert(*Aliases); - } }; typedef SmallVector SDISelAsmOperandInfoVector; @@ -5684,39 +5750,13 @@ typedef SmallVector SDISelAsmOperandInfoVector; /// allocation. This produces generally horrible, but correct, code. /// /// OpInfo describes the operand. -/// Input and OutputRegs are the set of already allocated physical registers. /// static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, DebugLoc DL, - SDISelAsmOperandInfo &OpInfo, - std::set &OutputRegs, - std::set &InputRegs) { + SDISelAsmOperandInfo &OpInfo) { LLVMContext &Context = *DAG.getContext(); - // Compute whether this value requires an input register, an output register, - // or both. - bool isOutReg = false; - bool isInReg = false; - switch (OpInfo.Type) { - case InlineAsm::isOutput: - isOutReg = true; - - // If there is an input constraint that matches this, we need to reserve - // the input register so no other inputs allocate to it. - isInReg = OpInfo.hasMatchingInput(); - break; - case InlineAsm::isInput: - isInReg = true; - isOutReg = false; - break; - case InlineAsm::isClobber: - isOutReg = true; - isInReg = true; - break; - } - - MachineFunction &MF = DAG.getMachineFunction(); SmallVector Regs; @@ -5790,8 +5830,6 @@ static void GetRegistersForValue(SelectionDAG &DAG, } OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); - const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); - OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); return; } @@ -5822,8 +5860,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { /// ConstraintOperands - Information about all of the constraints. SDISelAsmOperandInfoVector ConstraintOperands; - std::set OutputRegs, InputRegs; - TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS); @@ -5956,7 +5992,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // constant pool entry to get its address. const Value *OpVal = OpInfo.CallOperandVal; if (isa(OpVal) || isa(OpVal) || - isa(OpVal)) { + isa(OpVal) || isa(OpVal)) { OpInfo.CallOperand = DAG.getConstantPool(cast(OpVal), TLI.getPointerTy()); } else { @@ -5985,8 +6021,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs, - InputRegs); + GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo); } // Second pass - Loop over all of the operands, assigning virtual or physregs @@ -5997,8 +6032,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs, - InputRegs); + GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -6052,9 +6086,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Copy the output from the appropriate register. Find a register that // we can use. - if (OpInfo.AssignedRegs.Regs.empty()) - report_fatal_error("Couldn't allocate output reg for constraint '" + - Twine(OpInfo.ConstraintCode) + "'!"); + if (OpInfo.AssignedRegs.Regs.empty()) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "couldn't allocate output register for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + break; + } // If this is an indirect operand, store through the pointer after the // asm. @@ -6154,9 +6192,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { std::vector Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); - if (Ops.empty()) - report_fatal_error("Invalid operand for inline asm constraint '" + - Twine(OpInfo.ConstraintCode) + "'!"); + if (Ops.empty()) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "invalid operand for inline asm constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + break; + } // Add information to the INLINEASM node to know about this input. unsigned ResOpType = @@ -6187,9 +6229,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { "Don't know how to handle indirect register inputs yet!"); // Copy the input into the appropriate registers. - if (OpInfo.AssignedRegs.Regs.empty()) - report_fatal_error("Couldn't allocate input reg for constraint '" + - Twine(OpInfo.ConstraintCode) + "'!"); + if (OpInfo.AssignedRegs.Regs.empty()) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "couldn't allocate input reg for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + break; + } OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), Chain, &Flag); @@ -6327,7 +6373,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy, bool RetSExt, bool RetZExt, bool isVarArg, bool isInreg, unsigned NumFixedArgs, CallingConv::ID CallConv, bool isTailCall, - bool isReturnValueUsed, + bool doesNotRet, bool isReturnValueUsed, SDValue Callee, ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) const { @@ -6424,7 +6470,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy, } SmallVector InVals; - Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall, + Chain = LowerCall(Chain, Callee, CallConv, isVarArg, doesNotRet, isTailCall, Outs, OutVals, Ins, dl, DAG, InVals); // Verify that the target's LowerCall behaved as expected. @@ -6493,7 +6539,6 @@ void TargetLowering::LowerOperationWrapper(SDNode *N, SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("LowerOperation not implemented for this target!"); - return SDValue(); } void @@ -6515,10 +6560,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { /// isOnlyUsedInEntryBlock - If the specified argument is only used in the /// entry block, return true. This includes arguments used by switches, since /// the switch may expand into multiple basic blocks. -static bool isOnlyUsedInEntryBlock(const Argument *A) { +static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { // With FastISel active, we may be splitting blocks, so force creation // of virtual registers for all non-dead arguments. - if (EnableFastISel) + if (FastISel) return A->use_empty(); const BasicBlock *Entry = A->getParent()->begin(); @@ -6708,7 +6753,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { SDB->getCurDebugLoc()); SDB->setValue(I, Res); - if (!EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { + if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { if (LoadSDNode *LNode = dyn_cast(Res.getOperand(0).getNode())) if (FrameIndexSDNode *FI = @@ -6718,7 +6763,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // If this argument is live outside of the entry block, insert a copy from // wherever we got it to the vreg that other BB's will reference it as. - if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { + if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { // If we can, though, try to skip creating an unnecessary vreg. // FIXME: This isn't very clean... it would be nice to make this more // general. It's also subtly incompatible with the hacks FastISel @@ -6729,7 +6774,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { continue; } } - if (!isOnlyUsedInEntryBlock(I)) { + if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) { FuncInfo->InitializeRegForValue(I); SDB->CopyToExportRegsIfNeeded(I); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 0a21ca3472ca..8393b414926a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -67,11 +67,11 @@ class SIToFPInst; class StoreInst; class SwitchInst; class TargetData; +class TargetLibraryInfo; class TargetLowering; class TruncInst; class UIToFPInst; class UnreachableInst; -class UnwindInst; class VAArgInst; class ZExtInst; @@ -129,13 +129,13 @@ class SelectionDAGBuilder { /// Case - A struct to record the Value for a switch case, and the /// case's target basic block. struct Case { - Constant* Low; - Constant* High; + const Constant *Low; + const Constant *High; MachineBasicBlock* BB; uint32_t ExtraWeight; Case() : Low(0), High(0), BB(0), ExtraWeight(0) { } - Case(Constant* low, Constant* high, MachineBasicBlock* bb, + Case(const Constant *low, const Constant *high, MachineBasicBlock *bb, uint32_t extraweight) : Low(low), High(high), BB(bb), ExtraWeight(extraweight) { } @@ -294,6 +294,7 @@ class SelectionDAGBuilder { SelectionDAG &DAG; const TargetData *TD; AliasAnalysis *AA; + const TargetLibraryInfo *LibInfo; /// SwitchCases - Vector of CaseBlock structures used to communicate /// SwitchInst code generation information. @@ -338,7 +339,8 @@ class SelectionDAGBuilder { HasTailCall(false), Context(dag.getContext()) { } - void init(GCFunctionInfo *gfi, AliasAnalysis &aa); + void init(GCFunctionInfo *gfi, AliasAnalysis &aa, + const TargetLibraryInfo *li); /// clear - Clear out the current SelectionDAG and the associated /// state and prepare this SelectionDAGBuilder object to be used @@ -451,7 +453,8 @@ class SelectionDAGBuilder { MachineBasicBlock* Default, MachineBasicBlock *SwitchBB); - uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst); + uint32_t getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const; void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, uint32_t Weight = 0); public: @@ -471,7 +474,6 @@ class SelectionDAGBuilder { // These all get lowered before this pass. void visitInvoke(const InvokeInst &I); void visitResume(const ResumeInst &I); - void visitUnwind(const UnwindInst &I); void visitBinary(const User &I, unsigned OpCode); void visitShift(const User &I, unsigned Opcode); @@ -554,8 +556,6 @@ class SelectionDAGBuilder { void visitUserOp2(const Instruction &I) { llvm_unreachable("UserOp2 should not exist at instruction selection time!"); } - - const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op); void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp new file mode 100644 index 000000000000..f981afb437b0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -0,0 +1,631 @@ +//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG::dump method and friends. +// +//===----------------------------------------------------------------------===// + +#include "ScheduleDAGSDNodes.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringExtras.h" +using namespace llvm; + +std::string SDNode::getOperationName(const SelectionDAG *G) const { + switch (getOpcode()) { + default: + if (getOpcode() < ISD::BUILTIN_OP_END) + return "<>"; + if (isMachineOpcode()) { + if (G) + if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) + if (getMachineOpcode() < TII->getNumOpcodes()) + return TII->getName(getMachineOpcode()); + return "<>"; + } + if (G) { + const TargetLowering &TLI = G->getTargetLoweringInfo(); + const char *Name = TLI.getTargetNodeName(getOpcode()); + if (Name) return Name; + return "<>"; + } + return "<>"; + +#ifndef NDEBUG + case ISD::DELETED_NODE: return "<>"; +#endif + case ISD::PREFETCH: return "Prefetch"; + case ISD::MEMBARRIER: return "MemBarrier"; + case ISD::ATOMIC_FENCE: return "AtomicFence"; + case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; + case ISD::ATOMIC_SWAP: return "AtomicSwap"; + case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; + case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; + case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; + case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; + case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; + case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; + case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; + case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; + case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; + case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; + case ISD::ATOMIC_LOAD: return "AtomicLoad"; + case ISD::ATOMIC_STORE: return "AtomicStore"; + case ISD::PCMARKER: return "PCMarker"; + case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; + case ISD::SRCVALUE: return "SrcValue"; + case ISD::MDNODE_SDNODE: return "MDNode"; + case ISD::EntryToken: return "EntryToken"; + case ISD::TokenFactor: return "TokenFactor"; + case ISD::AssertSext: return "AssertSext"; + case ISD::AssertZext: return "AssertZext"; + + case ISD::BasicBlock: return "BasicBlock"; + case ISD::VALUETYPE: return "ValueType"; + case ISD::Register: return "Register"; + case ISD::RegisterMask: return "RegisterMask"; + case ISD::Constant: return "Constant"; + case ISD::ConstantFP: return "ConstantFP"; + case ISD::GlobalAddress: return "GlobalAddress"; + case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; + case ISD::FrameIndex: return "FrameIndex"; + case ISD::JumpTable: return "JumpTable"; + case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; + case ISD::RETURNADDR: return "RETURNADDR"; + case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; + case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; + case ISD::LSDAADDR: return "LSDAADDR"; + case ISD::EHSELECTION: return "EHSELECTION"; + case ISD::EH_RETURN: return "EH_RETURN"; + case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; + case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; + case ISD::ConstantPool: return "ConstantPool"; + case ISD::ExternalSymbol: return "ExternalSymbol"; + case ISD::BlockAddress: return "BlockAddress"; + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: { + unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; + unsigned IID = cast(getOperand(OpNo))->getZExtValue(); + if (IID < Intrinsic::num_intrinsics) + return Intrinsic::getName((Intrinsic::ID)IID); + else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) + return TII->getName(IID); + llvm_unreachable("Invalid intrinsic ID"); + } + + case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; + case ISD::TargetConstant: return "TargetConstant"; + case ISD::TargetConstantFP: return "TargetConstantFP"; + case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; + case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; + case ISD::TargetFrameIndex: return "TargetFrameIndex"; + case ISD::TargetJumpTable: return "TargetJumpTable"; + case ISD::TargetConstantPool: return "TargetConstantPool"; + case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; + case ISD::TargetBlockAddress: return "TargetBlockAddress"; + + case ISD::CopyToReg: return "CopyToReg"; + case ISD::CopyFromReg: return "CopyFromReg"; + case ISD::UNDEF: return "undef"; + case ISD::MERGE_VALUES: return "merge_values"; + case ISD::INLINEASM: return "inlineasm"; + case ISD::EH_LABEL: return "eh_label"; + case ISD::HANDLENODE: return "handlenode"; + + // Unary operators + case ISD::FABS: return "fabs"; + case ISD::FNEG: return "fneg"; + case ISD::FSQRT: return "fsqrt"; + case ISD::FSIN: return "fsin"; + case ISD::FCOS: return "fcos"; + case ISD::FTRUNC: return "ftrunc"; + case ISD::FFLOOR: return "ffloor"; + case ISD::FCEIL: return "fceil"; + case ISD::FRINT: return "frint"; + case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::FEXP: return "fexp"; + case ISD::FEXP2: return "fexp2"; + case ISD::FLOG: return "flog"; + case ISD::FLOG2: return "flog2"; + case ISD::FLOG10: return "flog10"; + + // Binary operators + case ISD::ADD: return "add"; + case ISD::SUB: return "sub"; + case ISD::MUL: return "mul"; + case ISD::MULHU: return "mulhu"; + case ISD::MULHS: return "mulhs"; + case ISD::SDIV: return "sdiv"; + case ISD::UDIV: return "udiv"; + case ISD::SREM: return "srem"; + case ISD::UREM: return "urem"; + case ISD::SMUL_LOHI: return "smul_lohi"; + case ISD::UMUL_LOHI: return "umul_lohi"; + case ISD::SDIVREM: return "sdivrem"; + case ISD::UDIVREM: return "udivrem"; + case ISD::AND: return "and"; + case ISD::OR: return "or"; + case ISD::XOR: return "xor"; + case ISD::SHL: return "shl"; + case ISD::SRA: return "sra"; + case ISD::SRL: return "srl"; + case ISD::ROTL: return "rotl"; + case ISD::ROTR: return "rotr"; + case ISD::FADD: return "fadd"; + case ISD::FSUB: return "fsub"; + case ISD::FMUL: return "fmul"; + case ISD::FDIV: return "fdiv"; + case ISD::FMA: return "fma"; + case ISD::FREM: return "frem"; + case ISD::FCOPYSIGN: return "fcopysign"; + case ISD::FGETSIGN: return "fgetsign"; + case ISD::FPOW: return "fpow"; + + case ISD::FPOWI: return "fpowi"; + case ISD::SETCC: return "setcc"; + case ISD::SELECT: return "select"; + case ISD::VSELECT: return "vselect"; + case ISD::SELECT_CC: return "select_cc"; + case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; + case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; + case ISD::CONCAT_VECTORS: return "concat_vectors"; + case ISD::INSERT_SUBVECTOR: return "insert_subvector"; + case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; + case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; + case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; + case ISD::CARRY_FALSE: return "carry_false"; + case ISD::ADDC: return "addc"; + case ISD::ADDE: return "adde"; + case ISD::SADDO: return "saddo"; + case ISD::UADDO: return "uaddo"; + case ISD::SSUBO: return "ssubo"; + case ISD::USUBO: return "usubo"; + case ISD::SMULO: return "smulo"; + case ISD::UMULO: return "umulo"; + case ISD::SUBC: return "subc"; + case ISD::SUBE: return "sube"; + case ISD::SHL_PARTS: return "shl_parts"; + case ISD::SRA_PARTS: return "sra_parts"; + case ISD::SRL_PARTS: return "srl_parts"; + + // Conversion operators. + case ISD::SIGN_EXTEND: return "sign_extend"; + case ISD::ZERO_EXTEND: return "zero_extend"; + case ISD::ANY_EXTEND: return "any_extend"; + case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::TRUNCATE: return "truncate"; + case ISD::FP_ROUND: return "fp_round"; + case ISD::FLT_ROUNDS_: return "flt_rounds"; + case ISD::FP_ROUND_INREG: return "fp_round_inreg"; + case ISD::FP_EXTEND: return "fp_extend"; + + case ISD::SINT_TO_FP: return "sint_to_fp"; + case ISD::UINT_TO_FP: return "uint_to_fp"; + case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::BITCAST: return "bitcast"; + case ISD::FP16_TO_FP32: return "fp16_to_fp32"; + case ISD::FP32_TO_FP16: return "fp32_to_fp16"; + + case ISD::CONVERT_RNDSAT: { + switch (cast(this)->getCvtCode()) { + default: llvm_unreachable("Unknown cvt code!"); + case ISD::CVT_FF: return "cvt_ff"; + case ISD::CVT_FS: return "cvt_fs"; + case ISD::CVT_FU: return "cvt_fu"; + case ISD::CVT_SF: return "cvt_sf"; + case ISD::CVT_UF: return "cvt_uf"; + case ISD::CVT_SS: return "cvt_ss"; + case ISD::CVT_SU: return "cvt_su"; + case ISD::CVT_US: return "cvt_us"; + case ISD::CVT_UU: return "cvt_uu"; + } + } + + // Control flow instructions + case ISD::BR: return "br"; + case ISD::BRIND: return "brind"; + case ISD::BR_JT: return "br_jt"; + case ISD::BRCOND: return "brcond"; + case ISD::BR_CC: return "br_cc"; + case ISD::CALLSEQ_START: return "callseq_start"; + case ISD::CALLSEQ_END: return "callseq_end"; + + // Other operators + case ISD::LOAD: return "load"; + case ISD::STORE: return "store"; + case ISD::VAARG: return "vaarg"; + case ISD::VACOPY: return "vacopy"; + case ISD::VAEND: return "vaend"; + case ISD::VASTART: return "vastart"; + case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; + case ISD::EXTRACT_ELEMENT: return "extract_element"; + case ISD::BUILD_PAIR: return "build_pair"; + case ISD::STACKSAVE: return "stacksave"; + case ISD::STACKRESTORE: return "stackrestore"; + case ISD::TRAP: return "trap"; + + // Bit manipulation + case ISD::BSWAP: return "bswap"; + case ISD::CTPOP: return "ctpop"; + case ISD::CTTZ: return "cttz"; + case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; + case ISD::CTLZ: return "ctlz"; + case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; + + // Trampolines + case ISD::INIT_TRAMPOLINE: return "init_trampoline"; + case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; + + case ISD::CONDCODE: + switch (cast(this)->get()) { + default: llvm_unreachable("Unknown setcc condition!"); + case ISD::SETOEQ: return "setoeq"; + case ISD::SETOGT: return "setogt"; + case ISD::SETOGE: return "setoge"; + case ISD::SETOLT: return "setolt"; + case ISD::SETOLE: return "setole"; + case ISD::SETONE: return "setone"; + + case ISD::SETO: return "seto"; + case ISD::SETUO: return "setuo"; + case ISD::SETUEQ: return "setue"; + case ISD::SETUGT: return "setugt"; + case ISD::SETUGE: return "setuge"; + case ISD::SETULT: return "setult"; + case ISD::SETULE: return "setule"; + case ISD::SETUNE: return "setune"; + + case ISD::SETEQ: return "seteq"; + case ISD::SETGT: return "setgt"; + case ISD::SETGE: return "setge"; + case ISD::SETLT: return "setlt"; + case ISD::SETLE: return "setle"; + case ISD::SETNE: return "setne"; + + case ISD::SETTRUE: return "settrue"; + case ISD::SETTRUE2: return "settrue2"; + case ISD::SETFALSE: return "setfalse"; + case ISD::SETFALSE2: return "setfalse2"; + } + } +} + +const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { + switch (AM) { + default: return ""; + case ISD::PRE_INC: return ""; + case ISD::PRE_DEC: return ""; + case ISD::POST_INC: return ""; + case ISD::POST_DEC: return ""; + } +} + +void SDNode::dump() const { dump(0); } +void SDNode::dump(const SelectionDAG *G) const { + print(dbgs(), G); + dbgs() << '\n'; +} + +void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { + OS << (void*)this << ": "; + + for (unsigned i = 0, e = getNumValues(); i != e; ++i) { + if (i) OS << ","; + if (getValueType(i) == MVT::Other) + OS << "ch"; + else + OS << getValueType(i).getEVTString(); + } + OS << " = " << getOperationName(G); +} + +void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { + if (const MachineSDNode *MN = dyn_cast(this)) { + if (!MN->memoperands_empty()) { + OS << "<"; + OS << "Mem:"; + for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), + e = MN->memoperands_end(); i != e; ++i) { + OS << **i; + if (llvm::next(i) != e) + OS << " "; + } + OS << ">"; + } + } else if (const ShuffleVectorSDNode *SVN = + dyn_cast(this)) { + OS << "<"; + for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { + int Idx = SVN->getMaskElt(i); + if (i) OS << ","; + if (Idx < 0) + OS << "u"; + else + OS << Idx; + } + OS << ">"; + } else if (const ConstantSDNode *CSDN = dyn_cast(this)) { + OS << '<' << CSDN->getAPIntValue() << '>'; + } else if (const ConstantFPSDNode *CSDN = dyn_cast(this)) { + if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) + OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; + else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble) + OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; + else { + OS << "getValueAPF().bitcastToAPInt().dump(); + OS << ")>"; + } + } else if (const GlobalAddressSDNode *GADN = + dyn_cast(this)) { + int64_t offset = GADN->getOffset(); + OS << '<'; + WriteAsOperand(OS, GADN->getGlobal()); + OS << '>'; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = GADN->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const FrameIndexSDNode *FIDN = dyn_cast(this)) { + OS << "<" << FIDN->getIndex() << ">"; + } else if (const JumpTableSDNode *JTDN = dyn_cast(this)) { + OS << "<" << JTDN->getIndex() << ">"; + if (unsigned int TF = JTDN->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const ConstantPoolSDNode *CP = dyn_cast(this)){ + int offset = CP->getOffset(); + if (CP->isMachineConstantPoolEntry()) + OS << "<" << *CP->getMachineCPVal() << ">"; + else + OS << "<" << *CP->getConstVal() << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = CP->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const BasicBlockSDNode *BBDN = dyn_cast(this)) { + OS << "<"; + const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); + if (LBB) + OS << LBB->getName() << " "; + OS << (const void*)BBDN->getBasicBlock() << ">"; + } else if (const RegisterSDNode *R = dyn_cast(this)) { + OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0); + } else if (const ExternalSymbolSDNode *ES = + dyn_cast(this)) { + OS << "'" << ES->getSymbol() << "'"; + if (unsigned int TF = ES->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const SrcValueSDNode *M = dyn_cast(this)) { + if (M->getValue()) + OS << "<" << M->getValue() << ">"; + else + OS << ""; + } else if (const MDNodeSDNode *MD = dyn_cast(this)) { + if (MD->getMD()) + OS << "<" << MD->getMD() << ">"; + else + OS << ""; + } else if (const VTSDNode *N = dyn_cast(this)) { + OS << ":" << N->getVT().getEVTString(); + } + else if (const LoadSDNode *LD = dyn_cast(this)) { + OS << "<" << *LD->getMemOperand(); + + bool doExt = true; + switch (LD->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: OS << ", anyext"; break; + case ISD::SEXTLOAD: OS << ", sext"; break; + case ISD::ZEXTLOAD: OS << ", zext"; break; + } + if (doExt) + OS << " from " << LD->getMemoryVT().getEVTString(); + + const char *AM = getIndexedModeName(LD->getAddressingMode()); + if (*AM) + OS << ", " << AM; + + OS << ">"; + } else if (const StoreSDNode *ST = dyn_cast(this)) { + OS << "<" << *ST->getMemOperand(); + + if (ST->isTruncatingStore()) + OS << ", trunc to " << ST->getMemoryVT().getEVTString(); + + const char *AM = getIndexedModeName(ST->getAddressingMode()); + if (*AM) + OS << ", " << AM; + + OS << ">"; + } else if (const MemSDNode* M = dyn_cast(this)) { + OS << "<" << *M->getMemOperand() << ">"; + } else if (const BlockAddressSDNode *BA = + dyn_cast(this)) { + OS << "<"; + WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); + OS << ", "; + WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); + OS << ">"; + if (unsigned int TF = BA->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } + + if (G) + if (unsigned Order = G->GetOrdering(this)) + OS << " [ORD=" << Order << ']'; + + if (getNodeId() != -1) + OS << " [ID=" << getNodeId() << ']'; + + DebugLoc dl = getDebugLoc(); + if (G && !dl.isUnknown()) { + DIScope + Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); + OS << " dbg:"; + // Omit the directory, since it's usually long and uninteresting. + if (Scope.Verify()) + OS << Scope.getFilename(); + else + OS << ""; + OS << ':' << dl.getLine(); + if (dl.getCol() != 0) + OS << ':' << dl.getCol(); + } +} + +static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getNode()->hasOneUse()) + DumpNodes(N->getOperand(i).getNode(), indent+2, G); + else + dbgs() << "\n" << std::string(indent+2, ' ') + << (void*)N->getOperand(i).getNode() << ": "; + + dbgs() << '\n'; + dbgs().indent(indent); + N->dump(G); +} + +void SelectionDAG::dump() const { + dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; + + for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); + I != E; ++I) { + const SDNode *N = I; + if (!N->hasOneUse() && N != getRoot().getNode()) + DumpNodes(N, 2, this); + } + + if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); + dbgs() << "\n\n"; +} + +void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + print_details(OS, G); +} + +typedef SmallPtrSet VisitedSDNodeSet; +static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, + const SelectionDAG *G, VisitedSDNodeSet &once) { + if (!once.insert(N)) // If we've been here before, return now. + return; + + // Dump the current SDNode, but don't end the line yet. + OS.indent(indent); + N->printr(OS, G); + + // Having printed this SDNode, walk the children: + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + + if (i) OS << ","; + OS << " "; + + if (child->getNumOperands() == 0) { + // This child has no grandchildren; print it inline right here. + child->printr(OS, G); + once.insert(child); + } else { // Just the address. FIXME: also print the child's opcode. + OS << (void*)child; + if (unsigned RN = N->getOperand(i).getResNo()) + OS << ":" << RN; + } + } + + OS << "\n"; + + // Dump children that have grandchildren on their own line(s). + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + DumpNodesr(OS, child, indent+2, G, once); + } +} + +void SDNode::dumpr() const { + VisitedSDNodeSet once; + DumpNodesr(dbgs(), this, 0, 0, once); +} + +void SDNode::dumpr(const SelectionDAG *G) const { + VisitedSDNodeSet once; + DumpNodesr(dbgs(), this, 0, G, once); +} + +static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, + const SelectionDAG *G, unsigned depth, + unsigned indent) { + if (depth == 0) + return; + + OS.indent(indent); + + N->print(OS, G); + + if (depth < 1) + return; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + // Don't follow chain operands. + if (N->getOperand(i).getValueType() == MVT::Other) + continue; + OS << '\n'; + printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2); + } +} + +void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, + unsigned depth) const { + printrWithDepthHelper(OS, this, G, depth, 0); +} + +void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { + // Don't print impossibly deep things. + printrWithDepth(OS, G, 10); +} + +void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { + printrWithDepth(dbgs(), G, depth); +} + +void SDNode::dumprFull(const SelectionDAG *G) const { + // Don't print impossibly deep things. + dumprWithDepth(G, 10); +} + +void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (i) OS << ", "; else OS << " "; + OS << (void*)getOperand(i).getNode(); + if (unsigned RN = getOperand(i).getResNo()) + OS << ":" << RN; + } + print_details(OS, G); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 68b9146adfe1..605509bd227a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -41,6 +41,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -61,6 +62,80 @@ STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG"); STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); +#ifndef NDEBUG +static cl::opt +EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden, + cl::desc("Enable extra verbose messages in the \"fast\" " + "instruction selector")); + // Terminators +STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret"); +STATISTIC(NumFastIselFailBr,"Fast isel fails on Br"); +STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch"); +STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr"); +STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke"); +STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume"); +STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable"); + + // Standard binary operators... +STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add"); +STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd"); +STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub"); +STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub"); +STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul"); +STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul"); +STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv"); +STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv"); +STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv"); +STATISTIC(NumFastIselFailURem,"Fast isel fails on URem"); +STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem"); +STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem"); + + // Logical operators... +STATISTIC(NumFastIselFailAnd,"Fast isel fails on And"); +STATISTIC(NumFastIselFailOr,"Fast isel fails on Or"); +STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor"); + + // Memory instructions... +STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca"); +STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load"); +STATISTIC(NumFastIselFailStore,"Fast isel fails on Store"); +STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg"); +STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM"); +STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence"); +STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr"); + + // Convert instructions... +STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc"); +STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt"); +STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt"); +STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc"); +STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt"); +STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI"); +STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI"); +STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP"); +STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP"); +STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr"); +STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt"); +STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast"); + + // Other instructions... +STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp"); +STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp"); +STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI"); +STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select"); +STATISTIC(NumFastIselFailCall,"Fast isel fails on Call"); +STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl"); +STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr"); +STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr"); +STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg"); +STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement"); +STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement"); +STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector"); +STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue"); +STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue"); +STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad"); +#endif + static cl::opt EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, cl::desc("Enable verbose messages in the \"fast\" " @@ -142,14 +217,15 @@ namespace llvm { CodeGenOpt::Level OptLevel) { const TargetLowering &TLI = IS->getTargetLowering(); - if (OptLevel == CodeGenOpt::None) + if (OptLevel == CodeGenOpt::None || + TLI.getSchedulingPreference() == Sched::Source) return createSourceListDAGScheduler(IS, OptLevel); - if (TLI.getSchedulingPreference() == Sched::Latency) - return createTDListDAGScheduler(IS, OptLevel); if (TLI.getSchedulingPreference() == Sched::RegPressure) return createBURRListDAGScheduler(IS, OptLevel); if (TLI.getSchedulingPreference() == Sched::Hybrid) return createHybridListDAGScheduler(IS, OptLevel); + if (TLI.getSchedulingPreference() == Sched::VLIW) + return createVLIWDAGScheduler(IS, OptLevel); assert(TLI.getSchedulingPreference() == Sched::ILP && "Unknown sched type!"); return createILPListDAGScheduler(IS, OptLevel); @@ -174,12 +250,11 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, "TargetLowering::EmitInstrWithCustomInserter!"; #endif llvm_unreachable(0); - return 0; } void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const { - assert(!MI->getDesc().hasPostISelHook() && + assert(!MI->hasPostISelHook() && "If a target marks an instruction with 'hasPostISelHook', " "it must implement TargetLowering::AdjustInstrPostInstrSelection!"); } @@ -188,11 +263,13 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // SelectionDAGISel code //===----------------------------------------------------------------------===// +void SelectionDAGISel::ISelUpdater::anchor() { } + SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()), FuncInfo(new FunctionLoweringInfo(TLI)), - CurDAG(new SelectionDAG(tm)), + CurDAG(new SelectionDAG(tm, OL)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), OptLevel(OL), @@ -200,6 +277,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); + initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry()); } SelectionDAGISel::~SelectionDAGISel() { @@ -213,6 +291,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); @@ -258,9 +337,9 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Do some sanity-checking on the command-line options. - assert((!EnableFastISelVerbose || EnableFastISel) && + assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) && "-fast-isel-verbose requires -fast-isel"); - assert((!EnableFastISelAbort || EnableFastISel) && + assert((!EnableFastISelAbort || TM.Options.EnableFastISel) && "-fast-isel-abort requires -fast-isel"); const Function &Fn = *mf.getFunction(); @@ -270,6 +349,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MF = &mf; RegInfo = &MF->getRegInfo(); AA = &getAnalysis(); + LibInfo = &getAnalysis(); GFI = Fn.hasGC() ? &getAnalysis().getFunctionInfo(Fn) : 0; DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); @@ -284,7 +364,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { else FuncInfo->BPI = 0; - SDB->init(GFI, *AA); + SDB->init(GFI, *AA, LibInfo); SelectAllBasicBlocks(Fn); @@ -348,7 +428,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TII.get(TargetOpcode::DBG_VALUE)) .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug) .addImm(Offset).addMetadata(Variable); - EntryMBB->insertAfter(CopyUseMI, NewMI); + MachineBasicBlock::iterator Pos = CopyUseMI; + EntryMBB->insertAfter(Pos, NewMI); } } } @@ -374,7 +455,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { } // Determine if there is a call to setjmp in the machine function. - MF->setCallsSetJmp(Fn.callsFunctionThatReturnsTwice()); + MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice()); // Replace forward-declared registers with the registers containing // the desired value. @@ -427,7 +508,6 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { Worklist.push_back(CurDAG->getRoot().getNode()); - APInt Mask; APInt KnownZero; APInt KnownOne; @@ -458,8 +538,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { continue; unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); - Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits()); - CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne); + CurDAG->ComputeMaskedBits(Src, KnownZero, KnownOne); FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne); } while (!Worklist.empty()); } @@ -478,8 +557,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #endif { BlockNumber = FuncInfo->MBB->getNumber(); - BlockName = MF->getFunction()->getNameStr() + ":" + - FuncInfo->MBB->getBasicBlock()->getNameStr(); + BlockName = MF->getFunction()->getName().str() + ":" + + FuncInfo->MBB->getBasicBlock()->getName().str(); } DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); @@ -489,7 +568,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Run the DAG combiner in pre-legalize mode. { NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled); - CurDAG->Combine(Unrestricted, *AA, OptLevel); + CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel); } DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber @@ -517,7 +596,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("DAG Combining after legalize types", GroupName, TimePassesIsEnabled); - CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel); } DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber @@ -542,7 +621,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("DAG Combining after legalize vectors", GroupName, TimePassesIsEnabled); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel); } DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" @@ -564,7 +643,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Run the DAG combiner in post-legalize mode. { NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel); } DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber @@ -592,7 +671,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("Instruction Scheduling", GroupName, TimePassesIsEnabled); - Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt); + Scheduler->Run(CurDAG, FuncInfo->MBB); } if (ViewSUnitDAGs) Scheduler->viewGraph(); @@ -603,8 +682,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled); - LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(); - FuncInfo->InsertPt = Scheduler->InsertPos; + // FuncInfo->InsertPt is passed by reference and set to the end of the + // scheduled instructions. + LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt); } // If the block was split, make sure we update any references that are used to @@ -693,43 +773,18 @@ void SelectionDAGISel::PrepareEHLandingPad() { // Assign the call site to the landing pad's begin label. MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]); - + const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); // Mark exception register as live in. - unsigned Reg = TLI.getExceptionAddressRegister(); + unsigned Reg = TLI.getExceptionPointerRegister(); if (Reg) MBB->addLiveIn(Reg); // Mark exception selector register as live in. Reg = TLI.getExceptionSelectorRegister(); if (Reg) MBB->addLiveIn(Reg); - - // FIXME: Hack around an exception handling flaw (PR1508): the personality - // function and list of typeids logically belong to the invoke (or, if you - // like, the basic block containing the invoke), and need to be associated - // with it in the dwarf exception handling tables. Currently however the - // information is provided by an intrinsic (eh.selector) that can be moved - // to unexpected places by the optimizers: if the unwind edge is critical, - // then breaking it can result in the intrinsics being in the successor of - // the landing pad, not the landing pad itself. This results - // in exceptions not being caught because no typeids are associated with - // the invoke. This may not be the only way things can go wrong, but it - // is the only way we try to work around for the moment. - const BasicBlock *LLVMBB = MBB->getBasicBlock(); - const BranchInst *Br = dyn_cast(LLVMBB->getTerminator()); - - if (Br && Br->isUnconditional()) { // Critical edge? - BasicBlock::const_iterator I, E; - for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I) - if (isa(I)) - break; - - if (I == E) - // No catch info found - try to extract some from the successor. - CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo); - } } /// TryToFoldFastISelLoad - We're checking to see if we can fold the specified @@ -822,10 +877,90 @@ static bool isFoldedOrDeadInstruction(const Instruction *I, !FuncInfo->isExportedInst(I); // Exported instrs must be computed. } +#ifndef NDEBUG +// Collect per Instruction statistics for fast-isel misses. Only those +// instructions that cause the bail are accounted for. It does not account for +// instructions higher in the block. Thus, summing the per instructions stats +// will not add up to what is reported by NumFastIselFailures. +static void collectFailStats(const Instruction *I) { + switch (I->getOpcode()) { + default: assert (0 && " "); + + // Terminators + case Instruction::Ret: NumFastIselFailRet++; return; + case Instruction::Br: NumFastIselFailBr++; return; + case Instruction::Switch: NumFastIselFailSwitch++; return; + case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return; + case Instruction::Invoke: NumFastIselFailInvoke++; return; + case Instruction::Resume: NumFastIselFailResume++; return; + case Instruction::Unreachable: NumFastIselFailUnreachable++; return; + + // Standard binary operators... + case Instruction::Add: NumFastIselFailAdd++; return; + case Instruction::FAdd: NumFastIselFailFAdd++; return; + case Instruction::Sub: NumFastIselFailSub++; return; + case Instruction::FSub: NumFastIselFailFSub++; return; + case Instruction::Mul: NumFastIselFailMul++; return; + case Instruction::FMul: NumFastIselFailFMul++; return; + case Instruction::UDiv: NumFastIselFailUDiv++; return; + case Instruction::SDiv: NumFastIselFailSDiv++; return; + case Instruction::FDiv: NumFastIselFailFDiv++; return; + case Instruction::URem: NumFastIselFailURem++; return; + case Instruction::SRem: NumFastIselFailSRem++; return; + case Instruction::FRem: NumFastIselFailFRem++; return; + + // Logical operators... + case Instruction::And: NumFastIselFailAnd++; return; + case Instruction::Or: NumFastIselFailOr++; return; + case Instruction::Xor: NumFastIselFailXor++; return; + + // Memory instructions... + case Instruction::Alloca: NumFastIselFailAlloca++; return; + case Instruction::Load: NumFastIselFailLoad++; return; + case Instruction::Store: NumFastIselFailStore++; return; + case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return; + case Instruction::AtomicRMW: NumFastIselFailAtomicRMW++; return; + case Instruction::Fence: NumFastIselFailFence++; return; + case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return; + + // Convert instructions... + case Instruction::Trunc: NumFastIselFailTrunc++; return; + case Instruction::ZExt: NumFastIselFailZExt++; return; + case Instruction::SExt: NumFastIselFailSExt++; return; + case Instruction::FPTrunc: NumFastIselFailFPTrunc++; return; + case Instruction::FPExt: NumFastIselFailFPExt++; return; + case Instruction::FPToUI: NumFastIselFailFPToUI++; return; + case Instruction::FPToSI: NumFastIselFailFPToSI++; return; + case Instruction::UIToFP: NumFastIselFailUIToFP++; return; + case Instruction::SIToFP: NumFastIselFailSIToFP++; return; + case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return; + case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return; + case Instruction::BitCast: NumFastIselFailBitCast++; return; + + // Other instructions... + case Instruction::ICmp: NumFastIselFailICmp++; return; + case Instruction::FCmp: NumFastIselFailFCmp++; return; + case Instruction::PHI: NumFastIselFailPHI++; return; + case Instruction::Select: NumFastIselFailSelect++; return; + case Instruction::Call: NumFastIselFailCall++; return; + case Instruction::Shl: NumFastIselFailShl++; return; + case Instruction::LShr: NumFastIselFailLShr++; return; + case Instruction::AShr: NumFastIselFailAShr++; return; + case Instruction::VAArg: NumFastIselFailVAArg++; return; + case Instruction::ExtractElement: NumFastIselFailExtractElement++; return; + case Instruction::InsertElement: NumFastIselFailInsertElement++; return; + case Instruction::ShuffleVector: NumFastIselFailShuffleVector++; return; + case Instruction::ExtractValue: NumFastIselFailExtractValue++; return; + case Instruction::InsertValue: NumFastIselFailInsertValue++; return; + case Instruction::LandingPad: NumFastIselFailLandingPad++; return; + } +} +#endif + void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; - if (EnableFastISel) + if (TM.Options.EnableFastISel) FastIS = TLI.createFastISel(*FuncInfo); // Iterate over all basic blocks in the function. @@ -894,13 +1029,16 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->setLastLocalValue(0); } + unsigned NumFastIselRemaining = std::distance(Begin, End); // Do FastISel on as many instructions as possible. for (; BI != Begin; --BI) { const Instruction *Inst = llvm::prior(BI); // If we no longer require this instruction, skip it. - if (isFoldedOrDeadInstruction(Inst, FuncInfo)) + if (isFoldedOrDeadInstruction(Inst, FuncInfo)) { + --NumFastIselRemaining; continue; + } // Bottom-up: reset the insert pos at the top, after any local-value // instructions. @@ -908,6 +1046,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to select the instruction with FastISel. if (FastIS->SelectInstruction(Inst)) { + --NumFastIselRemaining; ++NumFastIselSuccess; // If fast isel succeeded, skip over all the folded instructions, and // then see if there is a load right before the selected instructions. @@ -920,15 +1059,23 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } if (BeforeInst != Inst && isa(BeforeInst) && BeforeInst->hasOneUse() && - TryToFoldFastISelLoad(cast(BeforeInst), Inst, FastIS)) + TryToFoldFastISelLoad(cast(BeforeInst), Inst, FastIS)) { // If we succeeded, don't re-select the load. BI = llvm::next(BasicBlock::const_iterator(BeforeInst)); + --NumFastIselRemaining; + ++NumFastIselSuccess; + } continue; } +#ifndef NDEBUG + if (EnableFastISelVerbose2) + collectFailStats(Inst); +#endif + // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa(Inst)) { - ++NumFastIselFailures; + if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel missed call: "; Inst->dump(); @@ -943,24 +1090,30 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { bool HadTailCall = false; SelectBasicBlock(Inst, BI, HadTailCall); + // Recompute NumFastIselRemaining as Selection DAG instruction + // selection may have handled the call, input args, etc. + unsigned RemainingNow = std::distance(Begin, BI); + NumFastIselFailures += NumFastIselRemaining - RemainingNow; + // If the call was emitted as a tail call, we're done with the block. if (HadTailCall) { --BI; break; } + NumFastIselRemaining = RemainingNow; continue; } if (isa(Inst) && !isa(Inst)) { // Don't abort, and use a different message for terminator misses. - ++NumFastIselFailures; + NumFastIselFailures += NumFastIselRemaining; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel missed terminator: "; Inst->dump(); } } else { - ++NumFastIselFailures; + NumFastIselFailures += NumFastIselRemaining; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel miss: "; Inst->dump(); @@ -1289,7 +1442,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, APInt NeededMask = DesiredMask & ~ActualMask; APInt KnownZero, KnownOne; - CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne); + CurDAG->ComputeMaskedBits(LHS, KnownZero, KnownOne); // If all the missing bits in the or are already known to be set, match! if ((NeededMask & KnownOne) == NeededMask) @@ -2025,6 +2178,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::EntryToken: // These nodes remain the same. case ISD::BasicBlock: case ISD::Register: + case ISD::RegisterMask: //case ISD::VALUETYPE: //case ISD::CONDCODE: case ISD::HANDLENODE: diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index cd1647b17b9b..6cde05aea82a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetMachine.h" @@ -28,7 +27,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Config/config.h" using namespace llvm; namespace llvm { @@ -148,7 +146,7 @@ std::string DOTGraphTraits::getNodeLabel(const SDNode *Node, void SelectionDAG::viewGraph(const std::string &Title) { // This code is only for debugging! #ifndef NDEBUG - ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(), + ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(), false, Title); #else errs() << "SelectionDAG::viewGraph is only available in debug builds on " diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 907d8d9da1af..09a2b1f3d7a5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -36,31 +36,9 @@ using namespace llvm; /// - the promotion of vector elements. This feature is disabled by default /// and only enabled using this flag. static cl::opt -AllowPromoteIntElem("promote-elements", cl::Hidden, +AllowPromoteIntElem("promote-elements", cl::Hidden, cl::init(true), cl::desc("Allow promotion of integer vector element types")); -namespace llvm { -TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) { - bool isLocal = GV->hasLocalLinkage(); - bool isDeclaration = GV->isDeclaration(); - // FIXME: what should we do for protected and internal visibility? - // For variables, is internal different from hidden? - bool isHidden = GV->hasHiddenVisibility(); - - if (reloc == Reloc::PIC_) { - if (isLocal || isHidden) - return TLSModel::LocalDynamic; - else - return TLSModel::GeneralDynamic; - } else { - if (!isDeclaration || isHidden) - return TLSModel::LocalExec; - else - return TLSModel::InitialExec; - } -} -} - /// InitLibcallNames - Set default libcall names. /// static void InitLibcallNames(const char **Names) { @@ -572,21 +550,42 @@ TargetLowering::TargetLowering(const TargetMachine &tm, // ConstantFP nodes default to expand. Targets can either change this to // Legal, in which case all fp constants are legal, or use isFPImmLegal() // to optimize expansions for certain constants. + setOperationAction(ISD::ConstantFP, MVT::f16, Expand); setOperationAction(ISD::ConstantFP, MVT::f32, Expand); setOperationAction(ISD::ConstantFP, MVT::f64, Expand); setOperationAction(ISD::ConstantFP, MVT::f80, Expand); // These library functions default to expand. - setOperationAction(ISD::FLOG , MVT::f64, Expand); - setOperationAction(ISD::FLOG2, MVT::f64, Expand); - setOperationAction(ISD::FLOG10,MVT::f64, Expand); - setOperationAction(ISD::FEXP , MVT::f64, Expand); - setOperationAction(ISD::FEXP2, MVT::f64, Expand); - setOperationAction(ISD::FLOG , MVT::f32, Expand); - setOperationAction(ISD::FLOG2, MVT::f32, Expand); - setOperationAction(ISD::FLOG10,MVT::f32, Expand); - setOperationAction(ISD::FEXP , MVT::f32, Expand); - setOperationAction(ISD::FEXP2, MVT::f32, Expand); + setOperationAction(ISD::FLOG , MVT::f16, Expand); + setOperationAction(ISD::FLOG2, MVT::f16, Expand); + setOperationAction(ISD::FLOG10, MVT::f16, Expand); + setOperationAction(ISD::FEXP , MVT::f16, Expand); + setOperationAction(ISD::FEXP2, MVT::f16, Expand); + setOperationAction(ISD::FFLOOR, MVT::f16, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand); + setOperationAction(ISD::FCEIL, MVT::f16, Expand); + setOperationAction(ISD::FRINT, MVT::f16, Expand); + setOperationAction(ISD::FTRUNC, MVT::f16, Expand); + setOperationAction(ISD::FLOG , MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG10, MVT::f32, Expand); + setOperationAction(ISD::FEXP , MVT::f32, Expand); + setOperationAction(ISD::FEXP2, MVT::f32, Expand); + setOperationAction(ISD::FFLOOR, MVT::f32, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand); + setOperationAction(ISD::FCEIL, MVT::f32, Expand); + setOperationAction(ISD::FRINT, MVT::f32, Expand); + setOperationAction(ISD::FTRUNC, MVT::f32, Expand); + setOperationAction(ISD::FLOG , MVT::f64, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + setOperationAction(ISD::FLOG10, MVT::f64, Expand); + setOperationAction(ISD::FEXP , MVT::f64, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); + setOperationAction(ISD::FCEIL, MVT::f64, Expand); + setOperationAction(ISD::FRINT, MVT::f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::f64, Expand); // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); @@ -610,7 +609,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, ExceptionSelectorRegister = 0; BooleanContents = UndefinedBooleanContent; BooleanVectorContents = UndefinedBooleanContent; - SchedPreferenceInfo = Sched::Latency; + SchedPreferenceInfo = Sched::ILP; JumpBufSize = 0; JumpBufAlignment = 0; MinFunctionAlignment = 0; @@ -1080,8 +1079,12 @@ unsigned TargetLowering::getJumpTableEncoding() const { SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const { // If our PIC model is GP relative, use the global offset table as the base. - if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress) + unsigned JTEncoding = getJumpTableEncoding(); + + if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) || + (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress)) return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); + return Table; } @@ -1223,7 +1226,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (Depth != 0) { // If not at the root, Just compute the KnownZero/KnownOne bits to // simplify things downstream. - TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); + TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); return false; } // If this is the root being simplified, allow it to have multiple uses, @@ -1242,8 +1245,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, switch (Op.getOpcode()) { case ISD::Constant: // We know all of the bits for a constant! - KnownOne = cast(Op)->getAPIntValue() & NewMask; - KnownZero = ~KnownOne & NewMask; + KnownOne = cast(Op)->getAPIntValue(); + KnownZero = ~KnownOne; return false; // Don't fall through, will infinitely loop. case ISD::AND: // If the RHS is a constant, check to see if the LHS would be zero without @@ -1253,8 +1256,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (ConstantSDNode *RHSC = dyn_cast(Op.getOperand(1))) { APInt LHSZero, LHSOne; // Do not increment Depth here; that can cause an infinite loop. - TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask, - LHSZero, LHSOne, Depth); + TLO.DAG.ComputeMaskedBits(Op.getOperand(0), LHSZero, LHSOne, Depth); // If the LHS already has zeros where RHSC does, this and is dead. if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) return TLO.CombineTo(Op, Op.getOperand(0)); @@ -1473,9 +1475,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) { SDValue InnerOp = InOp.getNode()->getOperand(0); EVT InnerVT = InnerOp.getValueType(); - if ((APInt::getHighBitsSet(BitWidth, - BitWidth - InnerVT.getSizeInBits()) & - DemandedMask) == 0 && + unsigned InnerBits = InnerVT.getSizeInBits(); + if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 && isTypeDesirableForOp(ISD::SHL, InnerVT)) { EVT ShTy = getShiftAmountTy(InnerVT); if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) @@ -1545,7 +1546,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless // the shift amount is >= the size of the datatype, which is undefined. - if (DemandedMask == 1) + if (NewMask == 1) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op.getOperand(0), Op.getOperand(1))); @@ -1588,23 +1589,40 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } break; case ISD::SIGN_EXTEND_INREG: { - EVT EVT = cast(Op.getOperand(1))->getVT(); + EVT ExVT = cast(Op.getOperand(1))->getVT(); + + APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1); + // If we only care about the highest bit, don't bother shifting right. + if (MsbMask == DemandedMask) { + unsigned ShAmt = ExVT.getScalarType().getSizeInBits(); + SDValue InOp = Op.getOperand(0); + + // Compute the correct shift amount type, which must be getShiftAmountTy + // for scalar types after legalization. + EVT ShiftAmtTy = Op.getValueType(); + if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) + ShiftAmtTy = getShiftAmountTy(ShiftAmtTy); + + SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, + Op.getValueType(), InOp, ShiftAmt)); + } // Sign extension. Compute the demanded bits in the result that are not // present in the input. APInt NewBits = APInt::getHighBitsSet(BitWidth, - BitWidth - EVT.getScalarType().getSizeInBits()); + BitWidth - ExVT.getScalarType().getSizeInBits()); // If none of the extended bits are demanded, eliminate the sextinreg. if ((NewBits & NewMask) == 0) return TLO.CombineTo(Op, Op.getOperand(0)); APInt InSignBit = - APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth); + APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth); APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, - EVT.getScalarType().getSizeInBits()) & + ExVT.getScalarType().getSizeInBits()) & NewMask; // Since the sign extended bits are demanded, we know that the sign @@ -1622,7 +1640,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the input sign bit is known zero, convert this into a zero extension. if (KnownZero.intersects(InSignBit)) return TLO.CombineTo(Op, - TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT)); + TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT)); if (KnownOne.intersects(InSignBit)) { // Input sign bit known set KnownOne |= NewBits; @@ -1688,11 +1706,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the sign bit is known one, the top bits match. if (KnownOne.intersects(InSignBit)) { - KnownOne |= NewBits; - KnownZero &= ~NewBits; + KnownOne |= NewBits; + assert((KnownZero & NewBits) == 0); } else { // Otherwise, top bits aren't known. - KnownOne &= ~NewBits; - KnownZero &= ~NewBits; + assert((KnownOne & NewBits) == 0); + assert((KnownZero & NewBits) == 0); } break; } @@ -1783,7 +1801,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::BITCAST: // If this is an FP->Int bitcast and if the sign bit is the only // thing demanded, turn this into a FGETSIGN. - if (!Op.getOperand(0).getValueType().isVector() && + if (!TLO.LegalOperations() && + !Op.getValueType().isVector() && + !Op.getOperand(0).getValueType().isVector() && NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) && Op.getOperand(0).getValueType().isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); @@ -1824,7 +1844,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // FALL THROUGH default: // Just use ComputeMaskedBits to compute output bits. - TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth); + TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); break; } @@ -1840,7 +1860,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, @@ -1851,7 +1870,7 @@ void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!"); - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); } /// ComputeNumSignBitsForTargetNode - This method can be implemented by @@ -1895,9 +1914,8 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { // Fall back to ComputeMaskedBits to catch other known cases. EVT OpVT = Val.getValueType(); unsigned BitWidth = OpVT.getScalarType().getSizeInBits(); - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(Val, KnownZero, KnownOne); return (KnownZero.countPopulation() == BitWidth - 1) && (KnownOne.countPopulation() == 1); } @@ -2060,7 +2078,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, Lod->getPointerInfo().getWithOffset(bestOffset), - false, false, NewAlign); + false, false, false, NewAlign); return DAG.getSetCC(dl, VT, DAG.getNode(ISD::AND, dl, newVT, NewLoad, DAG.getConstant(bestMask.trunc(bestWidth), @@ -2393,8 +2411,15 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0 == N1) { // We can always fold X == X for integer setcc's. - if (N0.getValueType().isInteger()) - return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + if (N0.getValueType().isInteger()) { + switch (getBooleanContents(N0.getValueType().isVector())) { + case UndefinedBooleanContent: + case ZeroOrOneBooleanContent: + return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + case ZeroOrNegativeOneBooleanContent: + return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT); + } + } unsigned UOF = ISD::getUnorderedFlavor(Cond); if (UOF == 2) // FP operators that are undefined on NaNs. return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); @@ -2428,6 +2453,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + // If RHS is a legal immediate value for a compare instruction, we need + // to be careful about increasing register pressure needlessly. + bool LegalRHSImm = false; + if (ConstantSDNode *RHSC = dyn_cast(N1)) { if (ConstantSDNode *LHSR = dyn_cast(N0.getOperand(1))) { // Turn (X+C1) == C2 --> X == C2-C1 @@ -2462,25 +2491,33 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond); } } + + // Could RHSC fold directly into a compare? + if (RHSC->getValueType(0).getSizeInBits() <= 64) + LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue()); } // Simplify (X+Z) == X --> Z == 0 - if (N0.getOperand(0) == N1) - return DAG.getSetCC(dl, VT, N0.getOperand(1), - DAG.getConstant(0, N0.getValueType()), Cond); - if (N0.getOperand(1) == N1) { - if (DAG.isCommutativeBinOp(N0.getOpcode())) - return DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(0, N0.getValueType()), Cond); - else if (N0.getNode()->hasOneUse()) { - assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); - // (Z-X) == X --> Z == X<<1 - SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), - N1, + // Don't do this if X is an immediate that can fold into a cmp + // instruction and X+Z has other uses. It could be an induction variable + // chain, and the transform would increase register pressure. + if (!LegalRHSImm || N0.getNode()->hasOneUse()) { + if (N0.getOperand(0) == N1) + return DAG.getSetCC(dl, VT, N0.getOperand(1), + DAG.getConstant(0, N0.getValueType()), Cond); + if (N0.getOperand(1) == N1) { + if (DAG.isCommutativeBinOp(N0.getOpcode())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(0, N0.getValueType()), Cond); + else if (N0.getNode()->hasOneUse()) { + assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); + // (Z-X) == X --> Z == X<<1 + SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1, DAG.getConstant(1, getShiftAmountTy(N1.getValueType()))); - if (!DCI.isCalledByLegalizer()) - DCI.AddToWorklist(SH.getNode()); - return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.getNode()); + return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); + } } } } @@ -2984,7 +3021,6 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( /// is. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { switch (CT) { - default: llvm_unreachable("Unknown constraint type!"); case TargetLowering::C_Other: case TargetLowering::C_Unknown: return 0; @@ -2995,6 +3031,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { case TargetLowering::C_Memory: return 3; } + llvm_unreachable("Invalid constraint type"); } /// Examine constraint type and operand type and determine a weight value. @@ -3242,8 +3279,9 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// -SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, - std::vector* Created) const { +SDValue TargetLowering:: +BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, + std::vector* Created) const { EVT VT = N->getValueType(0); DebugLoc dl= N->getDebugLoc(); @@ -3258,10 +3296,12 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, // Multiply the numerator (operand 0) by the magic value // FIXME: We should support doing a MUL in a wider type SDValue Q; - if (isOperationLegalOrCustom(ISD::MULHS, VT)) + if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) : + isOperationLegalOrCustom(ISD::MULHS, VT)) Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0), DAG.getConstant(magics.m, VT)); - else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) + else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) : + isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N->getOperand(0), DAG.getConstant(magics.m, VT)).getNode(), 1); @@ -3299,8 +3339,9 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// -SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, - std::vector* Created) const { +SDValue TargetLowering:: +BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, + std::vector* Created) const { EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); @@ -3332,9 +3373,11 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, // Multiply the numerator (operand 0) by the magic value // FIXME: We should support doing a MUL in a wider type - if (isOperationLegalOrCustom(ISD::MULHU, VT)) + if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) : + isOperationLegalOrCustom(ISD::MULHU, VT)) Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT)); - else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) + else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) : + isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q, DAG.getConstant(magics.m, VT)).getNode(), 1); else diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp index 2609256c8ffa..0016047a134e 100644 --- a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp +++ b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp @@ -116,8 +116,7 @@ namespace { // Branches and invokes do not escape, only unwind, resume, and return // do. TerminatorInst *TI = CurBB->getTerminator(); - if (!isa(TI) && !isa(TI) && - !isa(TI)) + if (!isa(TI) && !isa(TI)) continue; Builder.SetInsertPoint(TI->getParent(), TI); diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp index 160f38f69236..21ae2f5e56eb 100644 --- a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp +++ b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp @@ -93,6 +93,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { } AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -124,7 +125,7 @@ MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) { } bool PEI::isReturnBlock(MachineBasicBlock* MBB) { - return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn()); + return (MBB && !MBB->empty() && MBB->back().isReturn()); } // Initialize shrink wrapping DFA sets, called before iterations. @@ -158,7 +159,7 @@ void PEI::initShrinkWrappingInfo() { // via --shrink-wrap-func=. #ifndef NDEBUG if (ShrinkWrapFunc != "") { - std::string MFName = MF->getFunction()->getNameStr(); + std::string MFName = MF->getFunction()->getName().str(); ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); } #endif @@ -1045,7 +1046,7 @@ std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { return ""; if (MBB->getBasicBlock()) - return MBB->getBasicBlock()->getNameStr(); + return MBB->getBasicBlock()->getName().str(); std::ostringstream name; name << "_MBB_" << MBB->getNumber(); diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp index ded2459d4278..9a86f32d8f96 100644 --- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -1,4 +1,4 @@ -//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===// +//===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===// // // The LLVM Compiler Infrastructure // @@ -29,21 +29,20 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/IRBuilder.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include using namespace llvm; -static cl::opt DisableOldSjLjEH("disable-old-sjlj-eh", cl::Hidden, - cl::desc("Disable the old SjLj EH preparation pass")); - STATISTIC(NumInvokes, "Number of invokes replaced"); -STATISTIC(NumUnwinds, "Number of unwinds replaced"); STATISTIC(NumSpilled, "Number of registers live across unwind edges"); namespace { - class SjLjEHPass : public FunctionPass { + class SjLjEHPrepare : public FunctionPass { const TargetLowering *TLI; Type *FunctionContextTy; Constant *RegisterFn; @@ -54,16 +53,12 @@ namespace { Constant *StackRestoreFn; Constant *LSDAAddrFn; Value *PersonalityFn; - Constant *SelectorFn; - Constant *ExceptionFn; Constant *CallSiteFn; - Constant *DispatchSetupFn; Constant *FuncCtxFn; - Value *CallSite; - DenseMap LPadSuccMap; + AllocaInst *FuncCtx; public: static char ID; // Pass identification, replacement for typeid - explicit SjLjEHPass(const TargetLowering *tli = NULL) + explicit SjLjEHPrepare(const TargetLowering *tli = NULL) : FunctionPass(ID), TLI(tli) { } bool doInitialization(Module &M); bool runOnFunction(Function &F); @@ -75,28 +70,24 @@ namespace { private: bool setupEntryBlockAndCallSites(Function &F); + void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, + Value *SelVal); Value *setupFunctionContext(Function &F, ArrayRef LPads); void lowerIncomingArguments(Function &F); void lowerAcrossUnwindEdges(Function &F, ArrayRef Invokes); - - void insertCallSiteStore(Instruction *I, int Number, Value *CallSite); - void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite, - SwitchInst *CatchSwitch); - void splitLiveRangesAcrossInvokes(SmallVector &Invokes); - void splitLandingPad(InvokeInst *II); - bool insertSjLjEHSupport(Function &F); + void insertCallSiteStore(Instruction *I, int Number); }; } // end anonymous namespace -char SjLjEHPass::ID = 0; +char SjLjEHPrepare::ID = 0; -// Public Interface To the SjLjEHPass pass. -FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) { - return new SjLjEHPass(TLI); +// Public Interface To the SjLjEHPrepare pass. +FunctionPass *llvm::createSjLjEHPreparePass(const TargetLowering *TLI) { + return new SjLjEHPrepare(TLI); } // doInitialization - Set up decalarations and types needed to process // exceptions. -bool SjLjEHPass::doInitialization(Module &M) { +bool SjLjEHPrepare::doInitialization(Module &M) { // Build the function context structure. // builtin_setjmp uses a five word jbuf Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); @@ -123,11 +114,7 @@ bool SjLjEHPass::doInitialization(Module &M) { StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp); LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); - SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector); - ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception); CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite); - DispatchSetupFn - = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup); FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext); PersonalityFn = 0; @@ -136,583 +123,67 @@ bool SjLjEHPass::doInitialization(Module &M) { /// insertCallSiteStore - Insert a store of the call-site value to the /// function context -void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number, - Value *CallSite) { +void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) { + IRBuilder<> Builder(I); + + // Get a reference to the call_site field. + Type *Int32Ty = Type::getInt32Ty(I->getContext()); + Value *Zero = ConstantInt::get(Int32Ty, 0); + Value *One = ConstantInt::get(Int32Ty, 1); + Value *Idxs[2] = { Zero, One }; + Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site"); + + // Insert a store of the call-site number ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()), Number); - // Insert a store of the call-site number - new StoreInst(CallSiteNoC, CallSite, true, I); // volatile -} - -/// splitLandingPad - Split a landing pad. This takes considerable care because -/// of PHIs and other nasties. The problem is that the jump table needs to jump -/// to the landing pad block. However, the landing pad block can be jumped to -/// only by an invoke instruction. So we clone the landingpad instruction into -/// its own basic block, have the invoke jump to there. The landingpad -/// instruction's basic block's successor is now the target for the jump table. -/// -/// But because of PHI nodes, we need to create another basic block for the jump -/// table to jump to. This is definitely a hack, because the values for the PHI -/// nodes may not be defined on the edge from the jump table. But that's okay, -/// because the jump table is simply a construct to mimic what is happening in -/// the CFG. So the values are mysteriously there, even though there is no value -/// for the PHI from the jump table's edge (hence calling this a hack). -void SjLjEHPass::splitLandingPad(InvokeInst *II) { - SmallVector NewBBs; - SplitLandingPadPredecessors(II->getUnwindDest(), II->getParent(), - ".1", ".2", this, NewBBs); - - // Create an empty block so that the jump table has something to jump to - // which doesn't have any PHI nodes. - BasicBlock *LPad = NewBBs[0]; - BasicBlock *Succ = *succ_begin(LPad); - BasicBlock *JumpTo = BasicBlock::Create(II->getContext(), "jt.land", - LPad->getParent(), Succ); - LPad->getTerminator()->eraseFromParent(); - BranchInst::Create(JumpTo, LPad); - BranchInst::Create(Succ, JumpTo); - LPadSuccMap[II] = JumpTo; - - for (BasicBlock::iterator I = Succ->begin(); isa(I); ++I) { - PHINode *PN = cast(I); - Value *Val = PN->removeIncomingValue(LPad, false); - PN->addIncoming(Val, JumpTo); - } -} - -/// markInvokeCallSite - Insert code to mark the call_site for this invoke -void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo, - Value *CallSite, - SwitchInst *CatchSwitch) { - ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()), - InvokeNo); - // The runtime comes back to the dispatcher with the call_site - 1 in - // the context. Odd, but there it is. - ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()), - InvokeNo - 1); - - // If the unwind edge has phi nodes, split the edge. - if (isa(II->getUnwindDest()->begin())) { - // FIXME: New EH - This if-condition will be always true in the new scheme. - if (II->getUnwindDest()->isLandingPad()) - splitLandingPad(II); - else - SplitCriticalEdge(II, 1, this); - - // If there are any phi nodes left, they must have a single predecessor. - while (PHINode *PN = dyn_cast(II->getUnwindDest()->begin())) { - PN->replaceAllUsesWith(PN->getIncomingValue(0)); - PN->eraseFromParent(); - } - } - - // Insert the store of the call site value - insertCallSiteStore(II, InvokeNo, CallSite); - - // Record the call site value for the back end so it stays associated with - // the invoke. - CallInst::Create(CallSiteFn, CallSiteNoC, "", II); - - // Add a switch case to our unwind block. - if (BasicBlock *SuccBB = LPadSuccMap[II]) { - CatchSwitch->addCase(SwitchValC, SuccBB); - } else { - CatchSwitch->addCase(SwitchValC, II->getUnwindDest()); - } - - // We still want this to look like an invoke so we emit the LSDA properly, - // so we don't transform the invoke into a call here. + Builder.CreateStore(CallSiteNoC, CallSite, true/*volatile*/); } /// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until /// we reach blocks we've already seen. -static void MarkBlocksLiveIn(BasicBlock *BB, std::set &LiveBBs) { - if (!LiveBBs.insert(BB).second) return; // already been here. +static void MarkBlocksLiveIn(BasicBlock *BB, + SmallPtrSet &LiveBBs) { + if (!LiveBBs.insert(BB)) return; // already been here. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) MarkBlocksLiveIn(*PI, LiveBBs); } -/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge -/// we spill into a stack location, guaranteeing that there is nothing live -/// across the unwind edge. This process also splits all critical edges -/// coming out of invoke's. -/// FIXME: Move this function to a common utility file (Local.cpp?) so -/// both SjLj and LowerInvoke can use it. -void SjLjEHPass:: -splitLiveRangesAcrossInvokes(SmallVector &Invokes) { - // First step, split all critical edges from invoke instructions. - for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { - InvokeInst *II = Invokes[i]; - SplitCriticalEdge(II, 0, this); - - // FIXME: New EH - This if-condition will be always true in the new scheme. - if (II->getUnwindDest()->isLandingPad()) - splitLandingPad(II); - else - SplitCriticalEdge(II, 1, this); - - assert(!isa(II->getNormalDest()) && - !isa(II->getUnwindDest()) && - "Critical edge splitting left single entry phi nodes?"); +/// substituteLPadValues - Substitute the values returned by the landingpad +/// instruction with those returned by the personality function. +void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, + Value *SelVal) { + SmallVector UseWorkList(LPI->use_begin(), LPI->use_end()); + while (!UseWorkList.empty()) { + Value *Val = UseWorkList.pop_back_val(); + ExtractValueInst *EVI = dyn_cast(Val); + if (!EVI) continue; + if (EVI->getNumIndices() != 1) continue; + if (*EVI->idx_begin() == 0) + EVI->replaceAllUsesWith(ExnVal); + else if (*EVI->idx_begin() == 1) + EVI->replaceAllUsesWith(SelVal); + if (EVI->getNumUses() == 0) + EVI->eraseFromParent(); } - Function *F = Invokes.back()->getParent()->getParent(); + if (LPI->getNumUses() == 0) return; - // To avoid having to handle incoming arguments specially, we lower each arg - // to a copy instruction in the entry block. This ensures that the argument - // value itself cannot be live across the entry block. - BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin(); - while (isa(AfterAllocaInsertPt) && - isa(cast(AfterAllocaInsertPt)->getArraySize())) - ++AfterAllocaInsertPt; - for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); - AI != E; ++AI) { - Type *Ty = AI->getType(); - // Aggregate types can't be cast, but are legal argument types, so we have - // to handle them differently. We use an extract/insert pair as a - // lightweight method to achieve the same goal. - if (isa(Ty) || isa(Ty) || isa(Ty)) { - Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt); - Instruction *NI = InsertValueInst::Create(AI, EI, 0); - NI->insertAfter(EI); - AI->replaceAllUsesWith(NI); - // Set the operand of the instructions back to the AllocaInst. - EI->setOperand(0, AI); - NI->setOperand(0, AI); - } else { - // This is always a no-op cast because we're casting AI to AI->getType() - // so src and destination types are identical. BitCast is the only - // possibility. - CastInst *NC = new BitCastInst( - AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); - AI->replaceAllUsesWith(NC); - // Set the operand of the cast instruction back to the AllocaInst. - // Normally it's forbidden to replace a CastInst's operand because it - // could cause the opcode to reflect an illegal conversion. However, - // we're replacing it here with the same value it was constructed with. - // We do this because the above replaceAllUsesWith() clobbered the - // operand, but we want this one to remain. - NC->setOperand(0, AI); - } - } + // There are still some uses of LPI. Construct an aggregate with the exception + // values and replace the LPI with that aggregate. + Type *LPadType = LPI->getType(); + Value *LPadVal = UndefValue::get(LPadType); + IRBuilder<> + Builder(llvm::next(BasicBlock::iterator(cast(SelVal)))); + LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val"); + LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val"); - // Finally, scan the code looking for instructions with bad live ranges. - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - // Ignore obvious cases we don't have to handle. In particular, most - // instructions either have no uses or only have a single use inside the - // current block. Ignore them quickly. - Instruction *Inst = II; - if (Inst->use_empty()) continue; - if (Inst->hasOneUse() && - cast(Inst->use_back())->getParent() == BB && - !isa(Inst->use_back())) continue; - - // If this is an alloca in the entry block, it's not a real register - // value. - if (AllocaInst *AI = dyn_cast(Inst)) - if (isa(AI->getArraySize()) && BB == F->begin()) - continue; - - // Avoid iterator invalidation by copying users to a temporary vector. - SmallVector Users; - for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); - UI != E; ++UI) { - Instruction *User = cast(*UI); - if (User->getParent() != BB || isa(User)) - Users.push_back(User); - } - - // Find all of the blocks that this value is live in. - std::set LiveBBs; - LiveBBs.insert(Inst->getParent()); - while (!Users.empty()) { - Instruction *U = Users.back(); - Users.pop_back(); - - if (!isa(U)) { - MarkBlocksLiveIn(U->getParent(), LiveBBs); - } else { - // Uses for a PHI node occur in their predecessor block. - PHINode *PN = cast(U); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == Inst) - MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); - } - } - - // Now that we know all of the blocks that this thing is live in, see if - // it includes any of the unwind locations. - bool NeedsSpill = false; - for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { - BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); - if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) - NeedsSpill = true; - } - - // If we decided we need a spill, do it. - // FIXME: Spilling this way is overkill, as it forces all uses of - // the value to be reloaded from the stack slot, even those that aren't - // in the unwind blocks. We should be more selective. - if (NeedsSpill) { - ++NumSpilled; - DemoteRegToStack(*Inst, true); - } - } -} - -/// CreateLandingPadLoad - Load the exception handling values and insert them -/// into a structure. -static Instruction *CreateLandingPadLoad(Function &F, Value *ExnAddr, - Value *SelAddr, - BasicBlock::iterator InsertPt) { - Value *Exn = new LoadInst(ExnAddr, "exn", false, - InsertPt); - Type *Ty = Type::getInt8PtrTy(F.getContext()); - Exn = CastInst::Create(Instruction::IntToPtr, Exn, Ty, "", InsertPt); - Value *Sel = new LoadInst(SelAddr, "sel", false, InsertPt); - - Ty = StructType::get(Exn->getType(), Sel->getType(), NULL); - InsertValueInst *LPadVal = InsertValueInst::Create(llvm::UndefValue::get(Ty), - Exn, 0, - "lpad.val", InsertPt); - return InsertValueInst::Create(LPadVal, Sel, 1, "lpad.val", InsertPt); -} - -/// ReplaceLandingPadVal - Replace the landingpad instruction's value with a -/// load from the stored values (via CreateLandingPadLoad). This looks through -/// PHI nodes, and removes them if they are dead. -static void ReplaceLandingPadVal(Function &F, Instruction *Inst, Value *ExnAddr, - Value *SelAddr) { - if (Inst->use_empty()) return; - - while (!Inst->use_empty()) { - Instruction *I = cast(Inst->use_back()); - - if (PHINode *PN = dyn_cast(I)) { - ReplaceLandingPadVal(F, PN, ExnAddr, SelAddr); - if (PN->use_empty()) PN->eraseFromParent(); - continue; - } - - I->replaceUsesOfWith(Inst, CreateLandingPadLoad(F, ExnAddr, SelAddr, I)); - } -} - -bool SjLjEHPass::insertSjLjEHSupport(Function &F) { - SmallVector Returns; - SmallVector Unwinds; - SmallVector Invokes; - - // Look through the terminators of the basic blocks to find invokes, returns - // and unwinds. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (ReturnInst *RI = dyn_cast(BB->getTerminator())) { - // Remember all return instructions in case we insert an invoke into this - // function. - Returns.push_back(RI); - } else if (InvokeInst *II = dyn_cast(BB->getTerminator())) { - Invokes.push_back(II); - } else if (UnwindInst *UI = dyn_cast(BB->getTerminator())) { - Unwinds.push_back(UI); - } - } - - NumInvokes += Invokes.size(); - NumUnwinds += Unwinds.size(); - - // If we don't have any invokes, there's nothing to do. - if (Invokes.empty()) return false; - - // Find the eh.selector.*, eh.exception and alloca calls. - // - // Remember any allocas() that aren't in the entry block, as the - // jmpbuf saved SP will need to be updated for them. - // - // We'll use the first eh.selector to determine the right personality - // function to use. For SJLJ, we always use the same personality for the - // whole function, not on a per-selector basis. - // FIXME: That's a bit ugly. Better way? - SmallVector EH_Selectors; - SmallVector EH_Exceptions; - SmallVector JmpbufUpdatePoints; - - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - // Note: Skip the entry block since there's nothing there that interests - // us. eh.selector and eh.exception shouldn't ever be there, and we - // want to disregard any allocas that are there. - // - // FIXME: This is awkward. The new EH scheme won't need to skip the entry - // block. - if (BB == F.begin()) { - if (InvokeInst *II = dyn_cast(F.begin()->getTerminator())) { - // FIXME: This will be always non-NULL in the new EH. - if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst()) - if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn(); - } - - continue; - } - - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - if (CallInst *CI = dyn_cast(I)) { - if (CI->getCalledFunction() == SelectorFn) { - if (!PersonalityFn) PersonalityFn = CI->getArgOperand(1); - EH_Selectors.push_back(CI); - } else if (CI->getCalledFunction() == ExceptionFn) { - EH_Exceptions.push_back(CI); - } else if (CI->getCalledFunction() == StackRestoreFn) { - JmpbufUpdatePoints.push_back(CI); - } - } else if (AllocaInst *AI = dyn_cast(I)) { - JmpbufUpdatePoints.push_back(AI); - } else if (InvokeInst *II = dyn_cast(I)) { - // FIXME: This will be always non-NULL in the new EH. - if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst()) - if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn(); - } - } - } - - // If we don't have any eh.selector calls, we can't determine the personality - // function. Without a personality function, we can't process exceptions. - if (!PersonalityFn) return false; - - // We have invokes, so we need to add register/unregister calls to get this - // function onto the global unwind stack. - // - // First thing we need to do is scan the whole function for values that are - // live across unwind edges. Each value that is live across an unwind edge we - // spill into a stack location, guaranteeing that there is nothing live across - // the unwind edge. This process also splits all critical edges coming out of - // invoke's. - splitLiveRangesAcrossInvokes(Invokes); - - - SmallVector LandingPads; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (InvokeInst *II = dyn_cast(BB->getTerminator())) - // FIXME: This will be always non-NULL in the new EH. - if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst()) - LandingPads.push_back(LPI); - } - - - BasicBlock *EntryBB = F.begin(); - // Create an alloca for the incoming jump buffer ptr and the new jump buffer - // that needs to be restored on all exits from the function. This is an - // alloca because the value needs to be added to the global context list. - unsigned Align = 4; // FIXME: Should be a TLI check? - AllocaInst *FunctionContext = - new AllocaInst(FunctionContextTy, 0, Align, - "fcn_context", F.begin()->begin()); - - Value *Idxs[2]; - Type *Int32Ty = Type::getInt32Ty(F.getContext()); - Value *Zero = ConstantInt::get(Int32Ty, 0); - // We need to also keep around a reference to the call_site field - Idxs[0] = Zero; - Idxs[1] = ConstantInt::get(Int32Ty, 1); - CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, "call_site", - EntryBB->getTerminator()); - - // The exception selector comes back in context->data[1] - Idxs[1] = ConstantInt::get(Int32Ty, 2); - Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, "fc_data", - EntryBB->getTerminator()); - Idxs[1] = ConstantInt::get(Int32Ty, 1); - Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, - "exc_selector_gep", - EntryBB->getTerminator()); - // The exception value comes back in context->data[0] - Idxs[1] = Zero; - Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, - "exception_gep", - EntryBB->getTerminator()); - - // The result of the eh.selector call will be replaced with a a reference to - // the selector value returned in the function context. We leave the selector - // itself so the EH analysis later can use it. - for (int i = 0, e = EH_Selectors.size(); i < e; ++i) { - CallInst *I = EH_Selectors[i]; - Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I); - I->replaceAllUsesWith(SelectorVal); - } - - // eh.exception calls are replaced with references to the proper location in - // the context. Unlike eh.selector, the eh.exception calls are removed - // entirely. - for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) { - CallInst *I = EH_Exceptions[i]; - // Possible for there to be duplicates, so check to make sure the - // instruction hasn't already been removed. - if (!I->getParent()) continue; - Value *Val = new LoadInst(ExceptionAddr, "exception", true, I); - Type *Ty = Type::getInt8PtrTy(F.getContext()); - Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I); - - I->replaceAllUsesWith(Val); - I->eraseFromParent(); - } - - for (unsigned i = 0, e = LandingPads.size(); i != e; ++i) - ReplaceLandingPadVal(F, LandingPads[i], ExceptionAddr, SelectorAddr); - - // The entry block changes to have the eh.sjlj.setjmp, with a conditional - // branch to a dispatch block for non-zero returns. If we return normally, - // we're not handling an exception and just register the function context and - // continue. - - // Create the dispatch block. The dispatch block is basically a big switch - // statement that goes to all of the invoke landing pads. - BasicBlock *DispatchBlock = - BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F); - - // Insert a load of the callsite in the dispatch block, and a switch on its - // value. By default, we issue a trap statement. - BasicBlock *TrapBlock = - BasicBlock::Create(F.getContext(), "trapbb", &F); - CallInst::Create(Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap), - "", TrapBlock); - new UnreachableInst(F.getContext(), TrapBlock); - - Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true, - DispatchBlock); - SwitchInst *DispatchSwitch = - SwitchInst::Create(DispatchLoad, TrapBlock, Invokes.size(), - DispatchBlock); - // Split the entry block to insert the conditional branch for the setjmp. - BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), - "eh.sjlj.setjmp.cont"); - - // Populate the Function Context - // 1. LSDA address - // 2. Personality function address - // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp) - - // LSDA address - Idxs[0] = Zero; - Idxs[1] = ConstantInt::get(Int32Ty, 4); - Value *LSDAFieldPtr = - GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep", - EntryBB->getTerminator()); - Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr", - EntryBB->getTerminator()); - new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator()); - - Idxs[1] = ConstantInt::get(Int32Ty, 3); - Value *PersonalityFieldPtr = - GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep", - EntryBB->getTerminator()); - new StoreInst(PersonalityFn, PersonalityFieldPtr, true, - EntryBB->getTerminator()); - - // Save the frame pointer. - Idxs[1] = ConstantInt::get(Int32Ty, 5); - Value *JBufPtr - = GetElementPtrInst::Create(FunctionContext, Idxs, "jbuf_gep", - EntryBB->getTerminator()); - Idxs[1] = ConstantInt::get(Int32Ty, 0); - Value *FramePtr = - GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep", - EntryBB->getTerminator()); - - Value *Val = CallInst::Create(FrameAddrFn, - ConstantInt::get(Int32Ty, 0), - "fp", - EntryBB->getTerminator()); - new StoreInst(Val, FramePtr, true, EntryBB->getTerminator()); - - // Save the stack pointer. - Idxs[1] = ConstantInt::get(Int32Ty, 2); - Value *StackPtr = - GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep", - EntryBB->getTerminator()); - - Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator()); - new StoreInst(Val, StackPtr, true, EntryBB->getTerminator()); - - // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. - Value *SetjmpArg = - CastInst::Create(Instruction::BitCast, JBufPtr, - Type::getInt8PtrTy(F.getContext()), "", - EntryBB->getTerminator()); - Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg, - "", - EntryBB->getTerminator()); - - // Add a call to dispatch_setup after the setjmp call. This is expanded to any - // target-specific setup that needs to be done. - CallInst::Create(DispatchSetupFn, DispatchVal, "", EntryBB->getTerminator()); - - // check the return value of the setjmp. non-zero goes to dispatcher. - Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), - ICmpInst::ICMP_EQ, DispatchVal, Zero, - "notunwind"); - // Nuke the uncond branch. - EntryBB->getTerminator()->eraseFromParent(); - - // Put in a new condbranch in its place. - BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB); - - // Register the function context and make sure it's known to not throw - CallInst *Register = - CallInst::Create(RegisterFn, FunctionContext, "", - ContBlock->getTerminator()); - Register->setDoesNotThrow(); - - // At this point, we are all set up, update the invoke instructions to mark - // their call_site values, and fill in the dispatch switch accordingly. - for (unsigned i = 0, e = Invokes.size(); i != e; ++i) - markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch); - - // Mark call instructions that aren't nounwind as no-action (call_site == - // -1). Skip the entry block, as prior to then, no function context has been - // created for this function and any unexpected exceptions thrown will go - // directly to the caller's context, which is what we want anyway, so no need - // to do anything here. - for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) { - for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I) - if (CallInst *CI = dyn_cast(I)) { - // Ignore calls to the EH builtins (eh.selector, eh.exception) - Constant *Callee = CI->getCalledFunction(); - if (Callee != SelectorFn && Callee != ExceptionFn - && !CI->doesNotThrow()) - insertCallSiteStore(CI, -1, CallSite); - } else if (ResumeInst *RI = dyn_cast(I)) { - insertCallSiteStore(RI, -1, CallSite); - } - } - - // Replace all unwinds with a branch to the unwind handler. - // ??? Should this ever happen with sjlj exceptions? - for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) { - BranchInst::Create(TrapBlock, Unwinds[i]); - Unwinds[i]->eraseFromParent(); - } - - // Following any allocas not in the entry block, update the saved SP in the - // jmpbuf to the new value. - for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) { - Instruction *AI = JmpbufUpdatePoints[i]; - Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); - StackAddr->insertAfter(AI); - Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); - StoreStackAddr->insertAfter(StackAddr); - } - - // Finally, for any returns from this function, if this function contains an - // invoke, add a call to unregister the function context. - for (unsigned i = 0, e = Returns.size(); i != e; ++i) - CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]); - - return true; + LPI->replaceAllUsesWith(LPadVal); } /// setupFunctionContext - Allocate the function context on the stack and fill /// it with all of the data that we know at this point. -Value *SjLjEHPass:: +Value *SjLjEHPrepare:: setupFunctionContext(Function &F, ArrayRef LPads) { BasicBlock *EntryBB = F.begin(); @@ -721,56 +192,42 @@ setupFunctionContext(Function &F, ArrayRef LPads) { // because the value needs to be added to the global context list. unsigned Align = TLI->getTargetData()->getPrefTypeAlignment(FunctionContextTy); - AllocaInst *FuncCtx = + FuncCtx = new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin()); // Fill in the function context structure. - Value *Idxs[2]; Type *Int32Ty = Type::getInt32Ty(F.getContext()); Value *Zero = ConstantInt::get(Int32Ty, 0); Value *One = ConstantInt::get(Int32Ty, 1); + Value *Two = ConstantInt::get(Int32Ty, 2); + Value *Three = ConstantInt::get(Int32Ty, 3); + Value *Four = ConstantInt::get(Int32Ty, 4); - // Keep around a reference to the call_site field. - Idxs[0] = Zero; - Idxs[1] = One; - CallSite = GetElementPtrInst::Create(FuncCtx, Idxs, "call_site", - EntryBB->getTerminator()); - - // Reference the __data field. - Idxs[1] = ConstantInt::get(Int32Ty, 2); - Value *FCData = GetElementPtrInst::Create(FuncCtx, Idxs, "__data", - EntryBB->getTerminator()); - - // The exception value comes back in context->__data[0]. - Idxs[1] = Zero; - Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, - "exception_gep", - EntryBB->getTerminator()); - - // The exception selector comes back in context->__data[1]. - Idxs[1] = One; - Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, - "exn_selector_gep", - EntryBB->getTerminator()); + Value *Idxs[2] = { Zero, 0 }; for (unsigned I = 0, E = LPads.size(); I != E; ++I) { LandingPadInst *LPI = LPads[I]; IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt()); + // Reference the __data field. + Idxs[1] = Two; + Value *FCData = Builder.CreateGEP(FuncCtx, Idxs, "__data"); + + // The exception values come back in context->__data[0]. + Idxs[1] = Zero; + Value *ExceptionAddr = Builder.CreateGEP(FCData, Idxs, "exception_gep"); Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val"); ExnVal = Builder.CreateIntToPtr(ExnVal, Type::getInt8PtrTy(F.getContext())); + + Idxs[1] = One; + Value *SelectorAddr = Builder.CreateGEP(FCData, Idxs, "exn_selector_gep"); Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val"); - Type *LPadType = LPI->getType(); - Value *LPadVal = UndefValue::get(LPadType); - LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val"); - LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val"); - - LPI->replaceAllUsesWith(LPadVal); + substituteLPadValues(LPI, ExnVal, SelVal); } // Personality function - Idxs[1] = ConstantInt::get(Int32Ty, 3); + Idxs[1] = Three; if (!PersonalityFn) PersonalityFn = LPads[0]->getPersonalityFn(); Value *PersonalityFieldPtr = @@ -780,11 +237,11 @@ setupFunctionContext(Function &F, ArrayRef LPads) { EntryBB->getTerminator()); // LSDA address - Idxs[1] = ConstantInt::get(Int32Ty, 4); - Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep", - EntryBB->getTerminator()); Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr", EntryBB->getTerminator()); + Idxs[1] = Four; + Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep", + EntryBB->getTerminator()); new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator()); return FuncCtx; @@ -794,7 +251,7 @@ setupFunctionContext(Function &F, ArrayRef LPads) { /// specially, we lower each arg to a copy instruction in the entry block. This /// ensures that the argument value itself cannot be live out of the entry /// block. -void SjLjEHPass::lowerIncomingArguments(Function &F) { +void SjLjEHPrepare::lowerIncomingArguments(Function &F) { BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin(); while (isa(AfterAllocaInsPt) && isa(cast(AfterAllocaInsPt)->getArraySize())) @@ -838,8 +295,8 @@ void SjLjEHPass::lowerIncomingArguments(Function &F) { /// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind /// edge and spill them. -void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, - ArrayRef Invokes) { +void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, + ArrayRef Invokes) { // Finally, scan the code looking for instructions with bad live ranges. for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { @@ -870,7 +327,7 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, } // Find all of the blocks that this value is live in. - std::set LiveBBs; + SmallPtrSet LiveBBs; LiveBBs.insert(Inst->getParent()); while (!Users.empty()) { Instruction *U = Users.back(); @@ -893,7 +350,10 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { + DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around " + << UnwindBlock->getName() << "\n"); NeedsSpill = true; + break; } } @@ -902,36 +362,60 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, // the value to be reloaded from the stack slot, even those that aren't // in the unwind blocks. We should be more selective. if (NeedsSpill) { - ++NumSpilled; DemoteRegToStack(*Inst, true); + ++NumSpilled; } } } + + // Go through the landing pads and remove any PHIs there. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); + LandingPadInst *LPI = UnwindBlock->getLandingPadInst(); + + // Place PHIs into a set to avoid invalidating the iterator. + SmallPtrSet PHIsToDemote; + for (BasicBlock::iterator + PN = UnwindBlock->begin(); isa(PN); ++PN) + PHIsToDemote.insert(cast(PN)); + if (PHIsToDemote.empty()) continue; + + // Demote the PHIs to the stack. + for (SmallPtrSet::iterator + I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I) + DemotePHIToStack(*I); + + // Move the landingpad instruction back to the top of the landing pad block. + LPI->moveBefore(UnwindBlock->begin()); + } } /// setupEntryBlockAndCallSites - Setup the entry block by creating and filling /// the function context and marking the call sites with the appropriate /// values. These values are used by the DWARF EH emitter. -bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { +bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { SmallVector Returns; SmallVector Invokes; - SmallVector LPads; + SmallSetVector LPads; // Look through the terminators of the basic blocks to find invokes. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (InvokeInst *II = dyn_cast(BB->getTerminator())) { Invokes.push_back(II); - LPads.push_back(II->getUnwindDest()->getLandingPadInst()); + LPads.insert(II->getUnwindDest()->getLandingPadInst()); } else if (ReturnInst *RI = dyn_cast(BB->getTerminator())) { Returns.push_back(RI); } if (Invokes.empty()) return false; + NumInvokes += Invokes.size(); + lowerIncomingArguments(F); lowerAcrossUnwindEdges(F, Invokes); - Value *FuncCtx = setupFunctionContext(F, LPads); + Value *FuncCtx = + setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); BasicBlock *EntryBB = F.begin(); Type *Int32Ty = Type::getInt32Ty(F.getContext()); @@ -979,7 +463,7 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { // At this point, we are all set up, update the invoke instructions to mark // their call_site values. for (unsigned I = 0, E = Invokes.size(); I != E; ++I) { - insertCallSiteStore(Invokes[I], I + 1, CallSite); + insertCallSiteStore(Invokes[I], I + 1); ConstantInt *CallSiteNum = ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1); @@ -998,9 +482,9 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I) if (CallInst *CI = dyn_cast(I)) { if (!CI->doesNotThrow()) - insertCallSiteStore(CI, -1, CallSite); + insertCallSiteStore(CI, -1); } else if (ResumeInst *RI = dyn_cast(I)) { - insertCallSiteStore(RI, -1, CallSite); + insertCallSiteStore(RI, -1); } // Register the function context and make sure it's known to not throw @@ -1008,6 +492,25 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { EntryBB->getTerminator()); Register->setDoesNotThrow(); + // Following any allocas not in the entry block, update the saved SP in the + // jmpbuf to the new value. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (BB == F.begin()) + continue; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + if (CallInst *CI = dyn_cast(I)) { + if (CI->getCalledFunction() != StackRestoreFn) + continue; + } else if (!isa(I)) { + continue; + } + Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); + StackAddr->insertAfter(I); + Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); + StoreStackAddr->insertAfter(StackAddr); + } + } + // Finally, for any returns from this function, if this function contains an // invoke, add a call to unregister the function context. for (unsigned I = 0, E = Returns.size(); I != E; ++I) @@ -1016,11 +519,7 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { return true; } -bool SjLjEHPass::runOnFunction(Function &F) { - bool Res = false; - if (!DisableOldSjLjEH) - Res = insertSjLjEHSupport(F); - else - Res = setupEntryBlockAndCallSites(F); +bool SjLjEHPrepare::runOnFunction(Function &F) { + bool Res = setupEntryBlockAndCallSites(F); return Res; } diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp index ca79cafcf4be..c5bd3a3cae63 100644 --- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp +++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp @@ -76,7 +76,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { MachineBasicBlock *mbb = &*mbbItr; // Insert an index for the MBB start. - SlotIndex blockStartIndex(back(), SlotIndex::LOAD); + SlotIndex blockStartIndex(back(), SlotIndex::Slot_Block); for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end(); miItr != miEnd; ++miItr) { @@ -88,7 +88,8 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { push_back(createEntry(mi, index += SlotIndex::InstrDist)); // Save this base index in the maps. - mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD))); + mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), + SlotIndex::Slot_Block))); ++functionSize; } @@ -97,14 +98,15 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { push_back(createEntry(0, index += SlotIndex::InstrDist)); MBBRanges[mbb->getNumber()].first = blockStartIndex; - MBBRanges[mbb->getNumber()].second = SlotIndex(back(), SlotIndex::LOAD); + MBBRanges[mbb->getNumber()].second = SlotIndex(back(), + SlotIndex::Slot_Block); idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb)); } // Sort the Idx2MBBMap std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare()); - DEBUG(dump()); + DEBUG(mf->print(dbgs(), this)); // And we're done! return false; @@ -166,7 +168,7 @@ void SlotIndexes::dump() const { // Print a SlotIndex to a raw_ostream. void SlotIndex::print(raw_ostream &os) const { if (isValid()) - os << entry().getIndex() << "LudS"[getSlot()]; + os << entry().getIndex() << "Berd"[getSlot()]; else os << "invalid"; } diff --git a/contrib/llvm/lib/CodeGen/Spiller.cpp b/contrib/llvm/lib/CodeGen/Spiller.cpp index b6bbcd7176dd..4cd22eb60f55 100644 --- a/contrib/llvm/lib/CodeGen/Spiller.cpp +++ b/contrib/llvm/lib/CodeGen/Spiller.cpp @@ -11,8 +11,8 @@ #include "Spiller.h" #include "VirtRegMap.h" -#include "LiveRangeEdit.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -29,7 +29,7 @@ using namespace llvm; namespace { - enum SpillerName { trivial, standard, inline_ }; + enum SpillerName { trivial, inline_ }; } static cl::opt @@ -37,10 +37,9 @@ spillerOpt("spiller", cl::desc("Spiller to use: (default: standard)"), cl::Prefix, cl::values(clEnumVal(trivial, "trivial spiller"), - clEnumVal(standard, "default spiller"), clEnumValN(inline_, "inline", "inline spiller"), clEnumValEnd), - cl::init(standard)); + cl::init(trivial)); // Spiller virtual destructor implementation. Spiller::~Spiller() {} @@ -73,8 +72,9 @@ class SpillerBase : public Spiller { /// Add spill ranges for every use/def of the live interval, inserting loads /// immediately before each use, and stores after each def. No folding or /// remat is attempted. - void trivialSpillEverywhere(LiveInterval *li, - SmallVectorImpl &newIntervals) { + void trivialSpillEverywhere(LiveRangeEdit& LRE) { + LiveInterval* li = &LRE.getParent(); + DEBUG(dbgs() << "Spilling everywhere " << *li << "\n"); assert(li->weight != HUGE_VALF && @@ -116,17 +116,14 @@ class SpillerBase : public Spiller { } // Create a new vreg & interval for this instr. - unsigned newVReg = mri->createVirtualRegister(trc); - vrm->grow(); - vrm->assignVirt2StackSlot(newVReg, ss); - LiveInterval *newLI = &lis->getOrCreateInterval(newVReg); + LiveInterval *newLI = &LRE.create(); newLI->weight = HUGE_VALF; // Update the reg operands & kill flags. for (unsigned i = 0; i < indices.size(); ++i) { unsigned mopIdx = indices[i]; MachineOperand &mop = mi->getOperand(mopIdx); - mop.setReg(newVReg); + mop.setReg(newLI->reg); if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) { mop.setIsKill(true); } @@ -136,33 +133,29 @@ class SpillerBase : public Spiller { // Insert reload if necessary. MachineBasicBlock::iterator miItr(mi); if (hasUse) { - tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc, + tii->loadRegFromStackSlot(*mi->getParent(), miItr, newLI->reg, ss, trc, tri); MachineInstr *loadInstr(prior(miItr)); SlotIndex loadIndex = - lis->InsertMachineInstrInMaps(loadInstr).getDefIndex(); - vrm->addSpillSlotUse(ss, loadInstr); + lis->InsertMachineInstrInMaps(loadInstr).getRegSlot(); SlotIndex endIndex = loadIndex.getNextIndex(); VNInfo *loadVNI = - newLI->getNextValue(loadIndex, 0, lis->getVNInfoAllocator()); + newLI->getNextValue(loadIndex, lis->getVNInfoAllocator()); newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI)); } // Insert store if necessary. if (hasDef) { - tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg, + tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr),newLI->reg, true, ss, trc, tri); MachineInstr *storeInstr(llvm::next(miItr)); SlotIndex storeIndex = - lis->InsertMachineInstrInMaps(storeInstr).getDefIndex(); - vrm->addSpillSlotUse(ss, storeInstr); + lis->InsertMachineInstrInMaps(storeInstr).getRegSlot(); SlotIndex beginIndex = storeIndex.getPrevIndex(); VNInfo *storeVNI = - newLI->getNextValue(beginIndex, 0, lis->getVNInfoAllocator()); + newLI->getNextValue(beginIndex, lis->getVNInfoAllocator()); newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI)); } - - newIntervals.push_back(newLI); } } }; @@ -182,60 +175,20 @@ class TrivialSpiller : public SpillerBase { void spill(LiveRangeEdit &LRE) { // Ignore spillIs - we don't use it. - trivialSpillEverywhere(&LRE.getParent(), *LRE.getNewVRegs()); + trivialSpillEverywhere(LRE); } }; } // end anonymous namespace -namespace { - -/// Falls back on LiveIntervals::addIntervalsForSpills. -class StandardSpiller : public Spiller { -protected: - MachineFunction *mf; - LiveIntervals *lis; - LiveStacks *lss; - MachineLoopInfo *loopInfo; - VirtRegMap *vrm; -public: - StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf, - VirtRegMap &vrm) - : mf(&mf), - lis(&pass.getAnalysis()), - lss(&pass.getAnalysis()), - loopInfo(pass.getAnalysisIfAvailable()), - vrm(&vrm) {} - - /// Falls back on LiveIntervals::addIntervalsForSpills. - void spill(LiveRangeEdit &LRE) { - std::vector added = - lis->addIntervalsForSpills(LRE.getParent(), LRE.getUselessVRegs(), - loopInfo, *vrm); - LRE.getNewVRegs()->insert(LRE.getNewVRegs()->end(), - added.begin(), added.end()); - - // Update LiveStacks. - int SS = vrm->getStackSlot(LRE.getReg()); - if (SS == VirtRegMap::NO_STACK_SLOT) - return; - const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(LRE.getReg()); - LiveInterval &SI = lss->getOrCreateInterval(SS, RC); - if (!SI.hasAtLeastOneValue()) - SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator()); - SI.MergeRangesInAsValue(LRE.getParent(), SI.getValNumInfo(0)); - } -}; - -} // end anonymous namespace +void Spiller::anchor() { } llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) { switch (spillerOpt) { - default: assert(0 && "unknown spiller"); case trivial: return new TrivialSpiller(pass, mf, vrm); - case standard: return new StandardSpiller(pass, mf, vrm); case inline_: return createInlineSpiller(pass, mf, vrm); } + llvm_unreachable("Invalid spiller optimization"); } diff --git a/contrib/llvm/lib/CodeGen/Spiller.h b/contrib/llvm/lib/CodeGen/Spiller.h index 41f1727da439..b7d5beaab1b2 100644 --- a/contrib/llvm/lib/CodeGen/Spiller.h +++ b/contrib/llvm/lib/CodeGen/Spiller.h @@ -22,6 +22,7 @@ namespace llvm { /// Implementations are utility classes which insert spill or remat code on /// demand. class Spiller { + virtual void anchor(); public: virtual ~Spiller() = 0; diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp index 63627800af69..9959f74d5f27 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp @@ -14,10 +14,10 @@ #define DEBUG_TYPE "regalloc" #include "SplitKit.h" -#include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -62,13 +62,14 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { const MachineBasicBlock *MBB = MF.getBlockNumbered(Num); const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor(); std::pair &LSP = LastSplitPoint[Num]; + SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB); // Compute split points on the first call. The pair is independent of the // current live interval. if (!LSP.first.isValid()) { MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator(); if (FirstTerm == MBB->end()) - LSP.first = LIS.getMBBEndIdx(MBB); + LSP.first = MBBEnd; else LSP.first = LIS.getInstructionIndex(FirstTerm); @@ -80,7 +81,7 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin(); I != E;) { --I; - if (I->getDesc().isCall()) { + if (I->isCall()) { LSP.second = LIS.getInstructionIndex(I); break; } @@ -89,10 +90,32 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { // If CurLI is live into a landing pad successor, move the last split point // back to the call that may throw. - if (LPad && LSP.second.isValid() && LIS.isLiveInToMBB(*CurLI, LPad)) - return LSP.second; - else + if (!LPad || !LSP.second || !LIS.isLiveInToMBB(*CurLI, LPad)) return LSP.first; + + // Find the value leaving MBB. + const VNInfo *VNI = CurLI->getVNInfoBefore(MBBEnd); + if (!VNI) + return LSP.first; + + // If the value leaving MBB was defined after the call in MBB, it can't + // really be live-in to the landing pad. This can happen if the landing pad + // has a PHI, and this register is undef on the exceptional edge. + // + if (!SlotIndex::isEarlierInstr(VNI->def, LSP.second) && VNI->def < MBBEnd) + return LSP.first; + + // Value is properly live-in to the landing pad. + // Only allow splits before the call. + return LSP.second; +} + +MachineBasicBlock::iterator +SplitAnalysis::getLastSplitPointIter(MachineBasicBlock *MBB) { + SlotIndex LSP = getLastSplitPoint(MBB->getNumber()); + if (LSP == LIS.getMBBEndIdx(MBB)) + return MBB->end(); + return LIS.getInstructionFromIndex(LSP); } /// analyzeUses - Count instructions, basic blocks, and loops using CurLI. @@ -112,7 +135,7 @@ void SplitAnalysis::analyzeUses() { I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E; ++I) if (!I.getOperand().isUndef()) - UseSlots.push_back(LIS.getInstructionIndex(&*I).getDefIndex()); + UseSlots.push_back(LIS.getInstructionIndex(&*I).getRegSlot()); array_pod_sort(UseSlots.begin(), UseSlots.end()); @@ -328,7 +351,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { // We don't need an AliasAnalysis since we will only be performing // cheap-as-a-copy remats anyway. - Edit->anyRematerializable(LIS, TII, 0); + Edit->anyRematerializable(0); } void SplitEditor::dump() const { @@ -351,7 +374,7 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, LiveInterval *LI = Edit->get(RegIdx); // Create a new value. - VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator()); + VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator()); // Use insert for lookup, so we can add missing values with a second lookup. std::pair InsP = @@ -366,14 +389,14 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, // If the previous value was a simple mapping, add liveness for it now. if (VNInfo *OldVNI = InsP.first->second.getPointer()) { SlotIndex Def = OldVNI->def; - LI->addRange(LiveRange(Def, Def.getNextSlot(), OldVNI)); + LI->addRange(LiveRange(Def, Def.getDeadSlot(), OldVNI)); // No longer a simple mapping. Switch to a complex, non-forced mapping. InsP.first->second = ValueForcePair(); } // This is a complex mapping, add liveness for VNI SlotIndex Def = VNI->def; - LI->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); + LI->addRange(LiveRange(Def, Def.getDeadSlot(), VNI)); return VNI; } @@ -393,7 +416,7 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) { // This was previously a single mapping. Make sure the old def is represented // by a trivial live range. SlotIndex Def = VNI->def; - Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); + Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getDeadSlot(), VNI)); // Mark as complex mapped, forced. VFP = ValueForcePair(0, true); } @@ -413,33 +436,31 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, // Attempt cheap-as-a-copy rematerialization. LiveRangeEdit::Remat RM(ParentVNI); - if (Edit->canRematerializeAt(RM, UseIdx, true, LIS)) { - Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI, Late); + if (Edit->canRematerializeAt(RM, UseIdx, true)) { + Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late); ++NumRemats; } else { // Can't remat, just insert a copy from parent. CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg) .addReg(Edit->getReg()); Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late) - .getDefIndex(); + .getRegSlot(); ++NumCopies; } // Define the value in Reg. - VNInfo *VNI = defValue(RegIdx, ParentVNI, Def); - VNI->setCopy(CopyMI); - return VNI; + return defValue(RegIdx, ParentVNI, Def); } /// Create a new virtual register and live interval. unsigned SplitEditor::openIntv() { // Create the complement as index 0. if (Edit->empty()) - Edit->create(LIS, VRM); + Edit->create(); // Create the open interval. OpenIdx = Edit->size(); - Edit->create(LIS, VRM); + Edit->create(); return OpenIdx; } @@ -497,7 +518,7 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) { } DEBUG(dbgs() << ": valno " << ParentVNI->id); VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB, - LIS.getLastSplitPoint(Edit->getParent(), &MBB)); + SA.getLastSplitPointIter(&MBB)); RegAssign.insert(VNI->def, End, OpenIdx); DEBUG(dump()); return VNI->def; @@ -586,7 +607,7 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { assert(OpenIdx && "openIntv not called before overlapIntv"); const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start); - assert(ParentVNI == Edit->getParent().getVNInfoAt(End.getPrevSlot()) && + assert(ParentVNI == Edit->getParent().getVNInfoBefore(End) && "Parent changes value in extended range"); assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) && "Range cannot span basic blocks"); @@ -640,7 +661,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl &Copies) { DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n'); forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def)); } else { - SlotIndex Kill = LIS.getInstructionIndex(MBBI).getDefIndex(); + SlotIndex Kill = LIS.getInstructionIndex(MBBI).getRegSlot(); DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI); AssignI.setStop(Kill); } @@ -780,7 +801,7 @@ void SplitEditor::hoistCopiesForSize() { SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot(); Dom.second = defFromParent(0, ParentVNI, Last, *Dom.first, - LIS.getLastSplitPoint(Edit->getParent(), Dom.first))->def; + SA.getLastSplitPointIter(Dom.first))->def; } // Remove redundant back-copies that are now known to be dominated by another @@ -958,7 +979,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { // use the same register as the def, so just do that always. SlotIndex Idx = LIS.getInstructionIndex(MI); if (MO.isDef() || MO.isUndef()) - Idx = MO.isEarlyClobber() ? Idx.getUseIndex() : Idx.getDefIndex(); + Idx = Idx.getRegSlot(MO.isEarlyClobber()); // Rewrite to the mapped register at Idx. unsigned RegIdx = RegAssign.lookup(Idx); @@ -981,7 +1002,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { if (!Edit->getParent().liveAt(Idx)) continue; } else - Idx = Idx.getUseIndex(); + Idx = Idx.getRegSlot(true); getLRCalc(RegIdx).extend(LI, Idx.getNextSlot(), LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator()); @@ -994,8 +1015,8 @@ void SplitEditor::deleteRematVictims() { LiveInterval *LI = *I; for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end(); LII != LIE; ++LII) { - // Dead defs end at the store slot. - if (LII->end != LII->valno->def.getNextSlot()) + // Dead defs end at the dead slot. + if (LII->end != LII->valno->def.getDeadSlot()) continue; MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def); assert(MI && "Missing instruction for dead def"); @@ -1012,7 +1033,7 @@ void SplitEditor::deleteRematVictims() { if (Dead.empty()) return; - Edit->eliminateDeadDefs(Dead, LIS, VRM, TII); + Edit->eliminateDeadDefs(Dead); } void SplitEditor::finish(SmallVectorImpl *LRMap) { @@ -1030,7 +1051,6 @@ void SplitEditor::finish(SmallVectorImpl *LRMap) { unsigned RegIdx = RegAssign.lookup(ParentVNI->def); VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def); VNI->setIsPHIDef(ParentVNI->isPHIDef()); - VNI->setCopy(ParentVNI->getCopy()); // Force rematted values to be recomputed everywhere. // The new live ranges may be truncated. @@ -1049,7 +1069,6 @@ void SplitEditor::finish(SmallVectorImpl *LRMap) { break; case SM_Speed: llvm_unreachable("Spill mode 'speed' not implemented yet"); - break; } // Transfer the simply mapped values, check if any are skipped. @@ -1089,7 +1108,7 @@ void SplitEditor::finish(SmallVectorImpl *LRMap) { SmallVector dups; dups.push_back(li); for (unsigned j = 1; j != NumComp; ++j) - dups.push_back(&Edit->create(LIS, VRM)); + dups.push_back(&Edit->create()); ConEQ.Distribute(&dups[0], MRI); // The new intervals all map back to i. if (LRMap) @@ -1097,7 +1116,7 @@ void SplitEditor::finish(SmallVectorImpl *LRMap) { } // Calculate spill weight and allocation hints for new intervals. - Edit->calculateRegClassAndHint(VRM.getMachineFunction(), LIS, SA.Loops); + Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops); assert(!LRMap || LRMap->size() == Edit->size()); } diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h index d8fc2122a3c7..4005a3d5cbbf 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.h +++ b/contrib/llvm/lib/CodeGen/SplitKit.h @@ -46,9 +46,6 @@ class SplitAnalysis { const MachineLoopInfo &Loops; const TargetInstrInfo &TII; - // Sorted slot indexes of using instructions. - SmallVector UseSlots; - /// Additional information about basic blocks where the current variable is /// live. Such a block will look like one of these templates: /// @@ -85,6 +82,9 @@ class SplitAnalysis { // Current live interval. const LiveInterval *CurLI; + // Sorted slot indexes of using instructions. + SmallVector UseSlots; + /// LastSplitPoint - Last legal split point in each basic block in the current /// function. The first entry is the first terminator, the second entry is the /// last valid split point for a variable that is live in to a landing pad @@ -135,7 +135,7 @@ class SplitAnalysis { /// getParent - Return the last analyzed interval. const LiveInterval &getParent() const { return *CurLI; } - /// getLastSplitPoint - Return that base index of the last valid split point + /// getLastSplitPoint - Return the base index of the last valid split point /// in the basic block numbered Num. SlotIndex getLastSplitPoint(unsigned Num) { // Inline the common simple case. @@ -145,6 +145,9 @@ class SplitAnalysis { return computeLastSplitPoint(Num); } + /// getLastSplitPointIter - Returns the last split point as an iterator. + MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock*); + /// isOriginalEndpoint - Return true if the original live range was killed or /// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def, /// and 'use' for an early-clobber def. @@ -152,6 +155,10 @@ class SplitAnalysis { /// splitting. bool isOriginalEndpoint(SlotIndex Idx) const; + /// getUseSlots - Return an array of SlotIndexes of instructions using CurLI. + /// This include both use and def operands, at most one entry per instruction. + ArrayRef getUseSlots() const { return UseSlots; } + /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks /// where CurLI has uses. ArrayRef getUseBlocks() const { return UseBlocks; } diff --git a/contrib/llvm/lib/CodeGen/Splitter.cpp b/contrib/llvm/lib/CodeGen/Splitter.cpp deleted file mode 100644 index 77973b72bbc8..000000000000 --- a/contrib/llvm/lib/CodeGen/Splitter.cpp +++ /dev/null @@ -1,827 +0,0 @@ -//===-- llvm/CodeGen/Splitter.cpp - Splitter -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "loopsplitter" - -#include "Splitter.h" - -#include "llvm/Module.h" -#include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveStackAnalysis.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" - -using namespace llvm; - -char LoopSplitter::ID = 0; -INITIALIZE_PASS_BEGIN(LoopSplitter, "loop-splitting", - "Split virtual regists across loop boundaries.", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(LoopSplitter, "loop-splitting", - "Split virtual regists across loop boundaries.", false, false) - -namespace llvm { - - class StartSlotComparator { - public: - StartSlotComparator(LiveIntervals &lis) : lis(lis) {} - bool operator()(const MachineBasicBlock *mbb1, - const MachineBasicBlock *mbb2) const { - return lis.getMBBStartIdx(mbb1) < lis.getMBBStartIdx(mbb2); - } - private: - LiveIntervals &lis; - }; - - class LoopSplit { - public: - LoopSplit(LoopSplitter &ls, LiveInterval &li, MachineLoop &loop) - : ls(ls), li(li), loop(loop), valid(true), inSplit(false), newLI(0) { - assert(TargetRegisterInfo::isVirtualRegister(li.reg) && - "Cannot split physical registers."); - } - - LiveInterval& getLI() const { return li; } - - MachineLoop& getLoop() const { return loop; } - - bool isValid() const { return valid; } - - bool isWorthwhile() const { return valid && (inSplit || !outSplits.empty()); } - - void invalidate() { valid = false; } - - void splitIncoming() { inSplit = true; } - - void splitOutgoing(MachineLoop::Edge &edge) { outSplits.insert(edge); } - - void addLoopInstr(MachineInstr *i) { loopInstrs.push_back(i); } - - void apply() { - assert(valid && "Attempt to apply invalid split."); - applyIncoming(); - applyOutgoing(); - copyRanges(); - renameInside(); - } - - private: - LoopSplitter &ls; - LiveInterval &li; - MachineLoop &loop; - bool valid, inSplit; - std::set outSplits; - std::vector loopInstrs; - - LiveInterval *newLI; - std::map vniMap; - - LiveInterval* getNewLI() { - if (newLI == 0) { - const TargetRegisterClass *trc = ls.mri->getRegClass(li.reg); - unsigned vreg = ls.mri->createVirtualRegister(trc); - newLI = &ls.lis->getOrCreateInterval(vreg); - } - return newLI; - } - - VNInfo* getNewVNI(VNInfo *oldVNI) { - VNInfo *newVNI = vniMap[oldVNI]; - - if (newVNI == 0) { - newVNI = getNewLI()->createValueCopy(oldVNI, - ls.lis->getVNInfoAllocator()); - vniMap[oldVNI] = newVNI; - } - - return newVNI; - } - - void applyIncoming() { - if (!inSplit) { - return; - } - - MachineBasicBlock *preHeader = loop.getLoopPreheader(); - if (preHeader == 0) { - assert(ls.canInsertPreHeader(loop) && - "Can't insert required preheader."); - preHeader = &ls.insertPreHeader(loop); - } - - LiveRange *preHeaderRange = - ls.lis->findExitingRange(li, preHeader); - assert(preHeaderRange != 0 && "Range not live into preheader."); - - // Insert the new copy. - MachineInstr *copy = BuildMI(*preHeader, - preHeader->getFirstTerminator(), - DebugLoc(), - ls.tii->get(TargetOpcode::COPY)) - .addReg(getNewLI()->reg, RegState::Define) - .addReg(li.reg, RegState::Kill); - - ls.lis->InsertMachineInstrInMaps(copy); - - SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex(); - - VNInfo *newVal = getNewVNI(preHeaderRange->valno); - newVal->def = copyDefIdx; - newVal->setCopy(copy); - li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true); - - getNewLI()->addRange(LiveRange(copyDefIdx, - ls.lis->getMBBEndIdx(preHeader), - newVal)); - } - - void applyOutgoing() { - - for (std::set::iterator osItr = outSplits.begin(), - osEnd = outSplits.end(); - osItr != osEnd; ++osItr) { - MachineLoop::Edge edge = *osItr; - MachineBasicBlock *outBlock = edge.second; - if (ls.isCriticalEdge(edge)) { - assert(ls.canSplitEdge(edge) && "Unsplitable critical edge."); - outBlock = &ls.splitEdge(edge, loop); - } - LiveRange *outRange = ls.lis->findEnteringRange(li, outBlock); - assert(outRange != 0 && "No exiting range?"); - - MachineInstr *copy = BuildMI(*outBlock, outBlock->begin(), - DebugLoc(), - ls.tii->get(TargetOpcode::COPY)) - .addReg(li.reg, RegState::Define) - .addReg(getNewLI()->reg, RegState::Kill); - - ls.lis->InsertMachineInstrInMaps(copy); - - SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex(); - - // Blow away output range definition. - outRange->valno->def = ls.lis->getInvalidIndex(); - li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx); - - SlotIndex newDefIdx = ls.lis->getMBBStartIdx(outBlock); - assert(ls.lis->getInstructionFromIndex(newDefIdx) == 0 && - "PHI def index points at actual instruction."); - VNInfo *newVal = - getNewLI()->getNextValue(newDefIdx, 0, ls.lis->getVNInfoAllocator()); - - getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock), - copyDefIdx, newVal)); - - } - } - - void copyRange(LiveRange &lr) { - std::pair lsr = - ls.getLoopSubRange(lr, loop); - - if (!lsr.first) - return; - - LiveRange loopRange(lsr.second.first, lsr.second.second, - getNewVNI(lr.valno)); - - li.removeRange(loopRange.start, loopRange.end, true); - - getNewLI()->addRange(loopRange); - } - - void copyRanges() { - for (std::vector::iterator iItr = loopInstrs.begin(), - iEnd = loopInstrs.end(); - iItr != iEnd; ++iItr) { - MachineInstr &instr = **iItr; - SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr); - if (instr.modifiesRegister(li.reg, 0)) { - LiveRange *defRange = - li.getLiveRangeContaining(instrIdx.getDefIndex()); - if (defRange != 0) // May have caught this already. - copyRange(*defRange); - } - if (instr.readsRegister(li.reg, 0)) { - LiveRange *useRange = - li.getLiveRangeContaining(instrIdx.getUseIndex()); - if (useRange != 0) { // May have caught this already. - copyRange(*useRange); - } - } - } - - for (MachineLoop::block_iterator bbItr = loop.block_begin(), - bbEnd = loop.block_end(); - bbItr != bbEnd; ++bbItr) { - MachineBasicBlock &loopBlock = **bbItr; - LiveRange *enteringRange = - ls.lis->findEnteringRange(li, &loopBlock); - if (enteringRange != 0) { - copyRange(*enteringRange); - } - } - } - - void renameInside() { - for (std::vector::iterator iItr = loopInstrs.begin(), - iEnd = loopInstrs.end(); - iItr != iEnd; ++iItr) { - MachineInstr &instr = **iItr; - for (unsigned i = 0; i < instr.getNumOperands(); ++i) { - MachineOperand &mop = instr.getOperand(i); - if (mop.isReg() && mop.getReg() == li.reg) { - mop.setReg(getNewLI()->reg); - } - } - } - } - - }; - - void LoopSplitter::getAnalysisUsage(AnalysisUsage &au) const { - au.addRequired(); - au.addPreserved(); - au.addRequired(); - au.addPreserved(); - au.addPreservedID(RegisterCoalescerPassID); - au.addPreserved(); - au.addPreserved(); - au.addRequired(); - au.addPreserved(); - au.addRequired(); - au.addPreserved(); - MachineFunctionPass::getAnalysisUsage(au); - } - - bool LoopSplitter::runOnMachineFunction(MachineFunction &fn) { - - mf = &fn; - mri = &mf->getRegInfo(); - tii = mf->getTarget().getInstrInfo(); - tri = mf->getTarget().getRegisterInfo(); - sis = &getAnalysis(); - lis = &getAnalysis(); - mli = &getAnalysis(); - mdt = &getAnalysis(); - - fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." + - mf->getFunction()->getName().str(); - - dbgs() << "Splitting " << mf->getFunction()->getName() << "."; - - dumpOddTerminators(); - -// dbgs() << "----------------------------------------\n"; -// lis->dump(); -// dbgs() << "----------------------------------------\n"; - -// std::deque loops; -// std::copy(mli->begin(), mli->end(), std::back_inserter(loops)); -// dbgs() << "Loops:\n"; -// while (!loops.empty()) { -// MachineLoop &loop = *loops.front(); -// loops.pop_front(); -// std::copy(loop.begin(), loop.end(), std::back_inserter(loops)); - -// dumpLoopInfo(loop); -// } - - //lis->dump(); - //exit(0); - - // Setup initial intervals. - for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end(); - liItr != liEnd; ++liItr) { - LiveInterval *li = liItr->second; - - if (TargetRegisterInfo::isVirtualRegister(li->reg) && - !lis->intervalIsInOneMBB(*li)) { - intervals.push_back(li); - } - } - - processIntervals(); - - intervals.clear(); - -// dbgs() << "----------------------------------------\n"; -// lis->dump(); -// dbgs() << "----------------------------------------\n"; - - dumpOddTerminators(); - - //exit(1); - - return false; - } - - void LoopSplitter::releaseMemory() { - fqn.clear(); - intervals.clear(); - loopRangeMap.clear(); - } - - void LoopSplitter::dumpOddTerminators() { - for (MachineFunction::iterator bbItr = mf->begin(), bbEnd = mf->end(); - bbItr != bbEnd; ++bbItr) { - MachineBasicBlock *mbb = &*bbItr; - MachineBasicBlock *a = 0, *b = 0; - SmallVector c; - if (tii->AnalyzeBranch(*mbb, a, b, c)) { - dbgs() << "MBB#" << mbb->getNumber() << " has multiway terminator.\n"; - dbgs() << " Terminators:\n"; - for (MachineBasicBlock::iterator iItr = mbb->begin(), iEnd = mbb->end(); - iItr != iEnd; ++iItr) { - MachineInstr *instr= &*iItr; - dbgs() << " " << *instr << ""; - } - dbgs() << "\n Listed successors: [ "; - for (MachineBasicBlock::succ_iterator sItr = mbb->succ_begin(), sEnd = mbb->succ_end(); - sItr != sEnd; ++sItr) { - MachineBasicBlock *succMBB = *sItr; - dbgs() << succMBB->getNumber() << " "; - } - dbgs() << "]\n\n"; - } - } - } - - void LoopSplitter::dumpLoopInfo(MachineLoop &loop) { - MachineBasicBlock &headerBlock = *loop.getHeader(); - typedef SmallVector ExitEdgesList; - ExitEdgesList exitEdges; - loop.getExitEdges(exitEdges); - - dbgs() << " Header: BB#" << headerBlock.getNumber() << ", Contains: [ "; - for (std::vector::const_iterator - subBlockItr = loop.getBlocks().begin(), - subBlockEnd = loop.getBlocks().end(); - subBlockItr != subBlockEnd; ++subBlockItr) { - MachineBasicBlock &subBlock = **subBlockItr; - dbgs() << "BB#" << subBlock.getNumber() << " "; - } - dbgs() << "], Exit edges: [ "; - for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(), - exitEdgeEnd = exitEdges.end(); - exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) { - MachineLoop::Edge &exitEdge = *exitEdgeItr; - dbgs() << "(MBB#" << exitEdge.first->getNumber() - << ", MBB#" << exitEdge.second->getNumber() << ") "; - } - dbgs() << "], Sub-Loop Headers: [ "; - for (MachineLoop::iterator subLoopItr = loop.begin(), - subLoopEnd = loop.end(); - subLoopItr != subLoopEnd; ++subLoopItr) { - MachineLoop &subLoop = **subLoopItr; - MachineBasicBlock &subLoopBlock = *subLoop.getHeader(); - dbgs() << "BB#" << subLoopBlock.getNumber() << " "; - } - dbgs() << "]\n"; - } - - void LoopSplitter::updateTerminators(MachineBasicBlock &mbb) { - mbb.updateTerminator(); - - for (MachineBasicBlock::iterator miItr = mbb.begin(), miEnd = mbb.end(); - miItr != miEnd; ++miItr) { - if (lis->isNotInMIMap(miItr)) { - lis->InsertMachineInstrInMaps(miItr); - } - } - } - - bool LoopSplitter::canInsertPreHeader(MachineLoop &loop) { - MachineBasicBlock *header = loop.getHeader(); - MachineBasicBlock *a = 0, *b = 0; - SmallVector c; - - for (MachineBasicBlock::pred_iterator pbItr = header->pred_begin(), - pbEnd = header->pred_end(); - pbItr != pbEnd; ++pbItr) { - MachineBasicBlock *predBlock = *pbItr; - if (!!tii->AnalyzeBranch(*predBlock, a, b, c)) { - return false; - } - } - - MachineFunction::iterator headerItr(header); - if (headerItr == mf->begin()) - return true; - MachineBasicBlock *headerLayoutPred = llvm::prior(headerItr); - assert(headerLayoutPred != 0 && "Header should have layout pred."); - - return (!tii->AnalyzeBranch(*headerLayoutPred, a, b, c)); - } - - MachineBasicBlock& LoopSplitter::insertPreHeader(MachineLoop &loop) { - assert(loop.getLoopPreheader() == 0 && "Loop already has preheader."); - - MachineBasicBlock &header = *loop.getHeader(); - - // Save the preds - we'll need to update them once we insert the preheader. - typedef std::set HeaderPreds; - HeaderPreds headerPreds; - - for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(), - predEnd = header.pred_end(); - predItr != predEnd; ++predItr) { - if (!loop.contains(*predItr)) - headerPreds.insert(*predItr); - } - - assert(!headerPreds.empty() && "No predecessors for header?"); - - //dbgs() << fqn << " MBB#" << header.getNumber() << " inserting preheader..."; - - MachineBasicBlock *preHeader = - mf->CreateMachineBasicBlock(header.getBasicBlock()); - - assert(preHeader != 0 && "Failed to create pre-header."); - - mf->insert(header, preHeader); - - for (HeaderPreds::iterator hpItr = headerPreds.begin(), - hpEnd = headerPreds.end(); - hpItr != hpEnd; ++hpItr) { - assert(*hpItr != 0 && "How'd a null predecessor get into this set?"); - MachineBasicBlock &hp = **hpItr; - hp.ReplaceUsesOfBlockWith(&header, preHeader); - } - preHeader->addSuccessor(&header); - - MachineBasicBlock *oldLayoutPred = - llvm::prior(MachineFunction::iterator(preHeader)); - if (oldLayoutPred != 0) { - updateTerminators(*oldLayoutPred); - } - - lis->InsertMBBInMaps(preHeader); - - if (MachineLoop *parentLoop = loop.getParentLoop()) { - assert(parentLoop->getHeader() != loop.getHeader() && - "Parent loop has same header?"); - parentLoop->addBasicBlockToLoop(preHeader, mli->getBase()); - - // Invalidate all parent loop ranges. - while (parentLoop != 0) { - loopRangeMap.erase(parentLoop); - parentLoop = parentLoop->getParentLoop(); - } - } - - for (LiveIntervals::iterator liItr = lis->begin(), - liEnd = lis->end(); - liItr != liEnd; ++liItr) { - LiveInterval &li = *liItr->second; - - // Is this safe for physregs? - // TargetRegisterInfo::isPhysicalRegister(li.reg) || - if (!lis->isLiveInToMBB(li, &header)) - continue; - - if (lis->isLiveInToMBB(li, preHeader)) { - assert(lis->isLiveOutOfMBB(li, preHeader) && - "Range terminates in newly added preheader?"); - continue; - } - - bool insertRange = false; - - for (MachineBasicBlock::pred_iterator predItr = preHeader->pred_begin(), - predEnd = preHeader->pred_end(); - predItr != predEnd; ++predItr) { - MachineBasicBlock *predMBB = *predItr; - if (lis->isLiveOutOfMBB(li, predMBB)) { - insertRange = true; - break; - } - } - - if (!insertRange) - continue; - - SlotIndex newDefIdx = lis->getMBBStartIdx(preHeader); - assert(lis->getInstructionFromIndex(newDefIdx) == 0 && - "PHI def index points at actual instruction."); - VNInfo *newVal = li.getNextValue(newDefIdx, 0, lis->getVNInfoAllocator()); - li.addRange(LiveRange(lis->getMBBStartIdx(preHeader), - lis->getMBBEndIdx(preHeader), - newVal)); - } - - - //dbgs() << "Dumping SlotIndexes:\n"; - //sis->dump(); - - //dbgs() << "done. (Added MBB#" << preHeader->getNumber() << ")\n"; - - return *preHeader; - } - - bool LoopSplitter::isCriticalEdge(MachineLoop::Edge &edge) { - assert(edge.first->succ_size() > 1 && "Non-sensical edge."); - if (edge.second->pred_size() > 1) - return true; - return false; - } - - bool LoopSplitter::canSplitEdge(MachineLoop::Edge &edge) { - MachineFunction::iterator outBlockItr(edge.second); - if (outBlockItr == mf->begin()) - return true; - MachineBasicBlock *outBlockLayoutPred = llvm::prior(outBlockItr); - assert(outBlockLayoutPred != 0 && "Should have a layout pred if out!=begin."); - MachineBasicBlock *a = 0, *b = 0; - SmallVector c; - return (!tii->AnalyzeBranch(*outBlockLayoutPred, a, b, c) && - !tii->AnalyzeBranch(*edge.first, a, b, c)); - } - - MachineBasicBlock& LoopSplitter::splitEdge(MachineLoop::Edge &edge, - MachineLoop &loop) { - - MachineBasicBlock &inBlock = *edge.first; - MachineBasicBlock &outBlock = *edge.second; - - assert((inBlock.succ_size() > 1) && (outBlock.pred_size() > 1) && - "Splitting non-critical edge?"); - - //dbgs() << fqn << " Splitting edge (MBB#" << inBlock.getNumber() - // << " -> MBB#" << outBlock.getNumber() << ")..."; - - MachineBasicBlock *splitBlock = - mf->CreateMachineBasicBlock(); - - assert(splitBlock != 0 && "Failed to create split block."); - - mf->insert(&outBlock, splitBlock); - - inBlock.ReplaceUsesOfBlockWith(&outBlock, splitBlock); - splitBlock->addSuccessor(&outBlock); - - MachineBasicBlock *oldLayoutPred = - llvm::prior(MachineFunction::iterator(splitBlock)); - if (oldLayoutPred != 0) { - updateTerminators(*oldLayoutPred); - } - - lis->InsertMBBInMaps(splitBlock); - - loopRangeMap.erase(&loop); - - MachineLoop *splitParentLoop = loop.getParentLoop(); - while (splitParentLoop != 0 && - !splitParentLoop->contains(&outBlock)) { - splitParentLoop = splitParentLoop->getParentLoop(); - } - - if (splitParentLoop != 0) { - assert(splitParentLoop->contains(&loop) && - "Split-block parent doesn't contain original loop?"); - splitParentLoop->addBasicBlockToLoop(splitBlock, mli->getBase()); - - // Invalidate all parent loop ranges. - while (splitParentLoop != 0) { - loopRangeMap.erase(splitParentLoop); - splitParentLoop = splitParentLoop->getParentLoop(); - } - } - - - for (LiveIntervals::iterator liItr = lis->begin(), - liEnd = lis->end(); - liItr != liEnd; ++liItr) { - LiveInterval &li = *liItr->second; - bool intersects = lis->isLiveOutOfMBB(li, &inBlock) && - lis->isLiveInToMBB(li, &outBlock); - if (lis->isLiveInToMBB(li, splitBlock)) { - if (!intersects) { - li.removeRange(lis->getMBBStartIdx(splitBlock), - lis->getMBBEndIdx(splitBlock), true); - } - } else if (intersects) { - SlotIndex newDefIdx = lis->getMBBStartIdx(splitBlock); - assert(lis->getInstructionFromIndex(newDefIdx) == 0 && - "PHI def index points at actual instruction."); - VNInfo *newVal = li.getNextValue(newDefIdx, 0, - lis->getVNInfoAllocator()); - li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock), - lis->getMBBEndIdx(splitBlock), - newVal)); - } - } - - //dbgs() << "done. (Added MBB#" << splitBlock->getNumber() << ")\n"; - - return *splitBlock; - } - - LoopSplitter::LoopRanges& LoopSplitter::getLoopRanges(MachineLoop &loop) { - typedef std::set LoopMBBSet; - LoopRangeMap::iterator lrItr = loopRangeMap.find(&loop); - if (lrItr == loopRangeMap.end()) { - LoopMBBSet loopMBBs((StartSlotComparator(*lis))); - std::copy(loop.block_begin(), loop.block_end(), - std::inserter(loopMBBs, loopMBBs.begin())); - - assert(!loopMBBs.empty() && "No blocks in loop?"); - - LoopRanges &loopRanges = loopRangeMap[&loop]; - assert(loopRanges.empty() && "Loop encountered but not processed?"); - SlotIndex oldEnd = lis->getMBBEndIdx(*loopMBBs.begin()); - loopRanges.push_back( - std::make_pair(lis->getMBBStartIdx(*loopMBBs.begin()), - lis->getInvalidIndex())); - for (LoopMBBSet::iterator curBlockItr = llvm::next(loopMBBs.begin()), - curBlockEnd = loopMBBs.end(); - curBlockItr != curBlockEnd; ++curBlockItr) { - SlotIndex newStart = lis->getMBBStartIdx(*curBlockItr); - if (newStart != oldEnd) { - loopRanges.back().second = oldEnd; - loopRanges.push_back(std::make_pair(newStart, - lis->getInvalidIndex())); - } - oldEnd = lis->getMBBEndIdx(*curBlockItr); - } - - loopRanges.back().second = - lis->getMBBEndIdx(*llvm::prior(loopMBBs.end())); - - return loopRanges; - } - return lrItr->second; - } - - std::pair LoopSplitter::getLoopSubRange( - const LiveRange &lr, - MachineLoop &loop) { - LoopRanges &loopRanges = getLoopRanges(loop); - LoopRanges::iterator lrItr = loopRanges.begin(), - lrEnd = loopRanges.end(); - while (lrItr != lrEnd && lr.start >= lrItr->second) { - ++lrItr; - } - - if (lrItr == lrEnd) { - SlotIndex invalid = lis->getInvalidIndex(); - return std::make_pair(false, SlotPair(invalid, invalid)); - } - - SlotIndex srStart(lr.start < lrItr->first ? lrItr->first : lr.start); - SlotIndex srEnd(lr.end > lrItr->second ? lrItr->second : lr.end); - - return std::make_pair(true, SlotPair(srStart, srEnd)); - } - - void LoopSplitter::dumpLoopRanges(MachineLoop &loop) { - LoopRanges &loopRanges = getLoopRanges(loop); - dbgs() << "For loop MBB#" << loop.getHeader()->getNumber() << ", subranges are: [ "; - for (LoopRanges::iterator lrItr = loopRanges.begin(), lrEnd = loopRanges.end(); - lrItr != lrEnd; ++lrItr) { - dbgs() << "[" << lrItr->first << ", " << lrItr->second << ") "; - } - dbgs() << "]\n"; - } - - void LoopSplitter::processHeader(LoopSplit &split) { - MachineBasicBlock &header = *split.getLoop().getHeader(); - //dbgs() << " Processing loop header BB#" << header.getNumber() << "\n"; - - if (!lis->isLiveInToMBB(split.getLI(), &header)) - return; // Not live in, but nothing wrong so far. - - MachineBasicBlock *preHeader = split.getLoop().getLoopPreheader(); - if (!preHeader) { - - if (!canInsertPreHeader(split.getLoop())) { - split.invalidate(); - return; // Couldn't insert a pre-header. Bail on this interval. - } - - for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(), - predEnd = header.pred_end(); - predItr != predEnd; ++predItr) { - if (lis->isLiveOutOfMBB(split.getLI(), *predItr)) { - split.splitIncoming(); - break; - } - } - } else if (lis->isLiveOutOfMBB(split.getLI(), preHeader)) { - split.splitIncoming(); - } - } - - void LoopSplitter::processLoopExits(LoopSplit &split) { - typedef SmallVector ExitEdgesList; - ExitEdgesList exitEdges; - split.getLoop().getExitEdges(exitEdges); - - //dbgs() << " Processing loop exits:\n"; - - for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(), - exitEdgeEnd = exitEdges.end(); - exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) { - MachineLoop::Edge exitEdge = *exitEdgeItr; - - LiveRange *outRange = - split.getLI().getLiveRangeContaining(lis->getMBBStartIdx(exitEdge.second)); - - if (outRange != 0) { - if (isCriticalEdge(exitEdge) && !canSplitEdge(exitEdge)) { - split.invalidate(); - return; - } - - split.splitOutgoing(exitEdge); - } - } - } - - void LoopSplitter::processLoopUses(LoopSplit &split) { - std::set processed; - - for (MachineRegisterInfo::reg_iterator - rItr = mri->reg_begin(split.getLI().reg), - rEnd = mri->reg_end(); - rItr != rEnd; ++rItr) { - MachineInstr &instr = *rItr; - if (split.getLoop().contains(&instr) && processed.count(&instr) == 0) { - split.addLoopInstr(&instr); - processed.insert(&instr); - } - } - - //dbgs() << " Rewriting reg" << li.reg << " to reg" << newLI->reg - // << " in blocks [ "; - //dbgs() << "]\n"; - } - - bool LoopSplitter::splitOverLoop(LiveInterval &li, MachineLoop &loop) { - assert(TargetRegisterInfo::isVirtualRegister(li.reg) && - "Attempt to split physical register."); - - LoopSplit split(*this, li, loop); - processHeader(split); - if (split.isValid()) - processLoopExits(split); - if (split.isValid()) - processLoopUses(split); - if (split.isValid() /* && split.isWorthwhile() */) { - split.apply(); - DEBUG(dbgs() << "Success.\n"); - return true; - } - DEBUG(dbgs() << "Failed.\n"); - return false; - } - - void LoopSplitter::processInterval(LiveInterval &li) { - std::deque loops; - std::copy(mli->begin(), mli->end(), std::back_inserter(loops)); - - while (!loops.empty()) { - MachineLoop &loop = *loops.front(); - loops.pop_front(); - DEBUG( - dbgs() << fqn << " reg" << li.reg << " " << li.weight << " BB#" - << loop.getHeader()->getNumber() << " "; - ); - if (!splitOverLoop(li, loop)) { - // Couldn't split over outer loop, schedule sub-loops to be checked. - std::copy(loop.begin(), loop.end(), std::back_inserter(loops)); - } - } - } - - void LoopSplitter::processIntervals() { - while (!intervals.empty()) { - LiveInterval &li = *intervals.front(); - intervals.pop_front(); - - assert(!lis->intervalIsInOneMBB(li) && - "Single interval in process worklist."); - - processInterval(li); - } - } - -} diff --git a/contrib/llvm/lib/CodeGen/Splitter.h b/contrib/llvm/lib/CodeGen/Splitter.h deleted file mode 100644 index 9fb1b8b30139..000000000000 --- a/contrib/llvm/lib/CodeGen/Splitter.h +++ /dev/null @@ -1,101 +0,0 @@ -//===-- llvm/CodeGen/Splitter.h - Splitter -*- C++ -*----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_SPLITTER_H -#define LLVM_CODEGEN_SPLITTER_H - -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/SlotIndexes.h" - -#include -#include -#include -#include - -namespace llvm { - - class LiveInterval; - class LiveIntervals; - struct LiveRange; - class LoopSplit; - class MachineDominatorTree; - class MachineRegisterInfo; - class SlotIndexes; - class TargetInstrInfo; - class VNInfo; - - class LoopSplitter : public MachineFunctionPass { - friend class LoopSplit; - public: - static char ID; - - LoopSplitter() : MachineFunctionPass(ID) { - initializeLoopSplitterPass(*PassRegistry::getPassRegistry()); - } - - virtual void getAnalysisUsage(AnalysisUsage &au) const; - - virtual bool runOnMachineFunction(MachineFunction &fn); - - virtual void releaseMemory(); - - - private: - - MachineFunction *mf; - LiveIntervals *lis; - MachineLoopInfo *mli; - MachineRegisterInfo *mri; - MachineDominatorTree *mdt; - SlotIndexes *sis; - const TargetInstrInfo *tii; - const TargetRegisterInfo *tri; - - std::string fqn; - std::deque intervals; - - typedef std::pair SlotPair; - typedef std::vector LoopRanges; - typedef std::map LoopRangeMap; - LoopRangeMap loopRangeMap; - - void dumpLoopInfo(MachineLoop &loop); - - void dumpOddTerminators(); - - void updateTerminators(MachineBasicBlock &mbb); - - bool canInsertPreHeader(MachineLoop &loop); - MachineBasicBlock& insertPreHeader(MachineLoop &loop); - - bool isCriticalEdge(MachineLoop::Edge &edge); - bool canSplitEdge(MachineLoop::Edge &edge); - MachineBasicBlock& splitEdge(MachineLoop::Edge &edge, MachineLoop &loop); - - LoopRanges& getLoopRanges(MachineLoop &loop); - std::pair getLoopSubRange(const LiveRange &lr, - MachineLoop &loop); - - void dumpLoopRanges(MachineLoop &loop); - - void processHeader(LoopSplit &split); - void processLoopExits(LoopSplit &split); - void processLoopUses(LoopSplit &split); - - bool splitOverLoop(LiveInterval &li, MachineLoop &loop); - - void processInterval(LiveInterval &li); - - void processIntervals(); - }; - -} - -#endif diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp index 1f0e5a2711ae..43a6ad8c97a4 100644 --- a/contrib/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp @@ -123,16 +123,11 @@ bool StackProtector::RequiresStackProtector() const { // protectors. return true; - if (ArrayType *AT = dyn_cast(AI->getAllocatedType())) { - // We apparently only care about character arrays. - if (!AT->getElementType()->isIntegerTy(8)) - continue; - + if (ArrayType *AT = dyn_cast(AI->getAllocatedType())) // If an array has more than SSPBufferSize bytes of allocated space, // then we emit stack protectors. if (SSPBufferSize <= TD->getTypeAllocSize(AT)) return true; - } } } diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp index 57cbe1ba5960..1e940b1d0711 100644 --- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "stackcoloring" -#include "VirtRegMap.h" #include "llvm/Function.h" #include "llvm/Module.h" #include "llvm/CodeGen/Passes.h" @@ -40,29 +39,17 @@ DisableSharing("no-stack-slot-sharing", cl::init(false), cl::Hidden, cl::desc("Suppress slot sharing during stack coloring")); -static cl::opt -ColorWithRegsOpt("color-ss-with-regs", - cl::init(false), cl::Hidden, - cl::desc("Color stack slots with free registers")); - - static cl::opt DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden); STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring"); -STATISTIC(NumRegRepl, "Number of stack slot refs replaced with reg refs"); -STATISTIC(NumLoadElim, "Number of loads eliminated"); -STATISTIC(NumStoreElim, "Number of stores eliminated"); STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated"); namespace { class StackSlotColoring : public MachineFunctionPass { bool ColorWithRegs; LiveStacks* LS; - VirtRegMap* VRM; MachineFrameInfo *MFI; - MachineRegisterInfo *MRI; const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; const MachineLoopInfo *loopInfo; // SSIntervals - Spill slot intervals. @@ -98,18 +85,12 @@ namespace { MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) { initializeStackSlotColoringPass(*PassRegistry::getPassRegistry()); } - StackSlotColoring(bool RegColor) : - MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) { - initializeStackSlotColoringPass(*PassRegistry::getPassRegistry()); - } - + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); AU.addRequired(); AU.addPreserved(); AU.addPreservedID(MachineDominatorsID); @@ -117,9 +98,6 @@ namespace { } virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char* getPassName() const { - return "Stack Slot Coloring"; - } private: void InitializeSlots(); @@ -127,41 +105,23 @@ namespace { bool OverlapWithAssignments(LiveInterval *li, int Color) const; int ColorSlot(LiveInterval *li); bool ColorSlots(MachineFunction &MF); - bool ColorSlotsWithFreeRegs(SmallVector &SlotMapping, - SmallVector, 16> &RevMap, - BitVector &SlotIsReg); void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI, MachineFunction &MF); - bool PropagateBackward(MachineBasicBlock::iterator MII, - MachineBasicBlock *MBB, - unsigned OldReg, unsigned NewReg); - bool PropagateForward(MachineBasicBlock::iterator MII, - MachineBasicBlock *MBB, - unsigned OldReg, unsigned NewReg); - void UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, - unsigned Reg, const TargetRegisterClass *RC, - SmallSet &Defs, - MachineFunction &MF); - bool AllMemRefsCanBeUnfolded(int SS); bool RemoveDeadStores(MachineBasicBlock* MBB); }; } // end anonymous namespace char StackSlotColoring::ID = 0; +char &llvm::StackSlotColoringID = StackSlotColoring::ID; INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring", "Stack Slot Coloring", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveStacks) -INITIALIZE_PASS_DEPENDENCY(VirtRegMap) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring", "Stack Slot Coloring", false, false) -FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) { - return new StackSlotColoring(RegColor); -} - namespace { // IntervalSorter - Comparison predicate that sort live intervals by // their weight. @@ -248,79 +208,6 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { return false; } -/// ColorSlotsWithFreeRegs - If there are any free registers available, try -/// replacing spill slots references with registers instead. -bool -StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector &SlotMapping, - SmallVector, 16> &RevMap, - BitVector &SlotIsReg) { - if (!(ColorWithRegs || ColorWithRegsOpt) || !VRM->HasUnusedRegisters()) - return false; - - bool Changed = false; - DEBUG(dbgs() << "Assigning unused registers to spill slots:\n"); - for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { - LiveInterval *li = SSIntervals[i]; - int SS = TargetRegisterInfo::stackSlot2Index(li->reg); - if (!UsedColors[SS] || li->weight < 20) - // If the weight is < 20, i.e. two references in a loop with depth 1, - // don't bother with it. - continue; - - // These slots allow to share the same registers. - bool AllColored = true; - SmallVector ColoredRegs; - for (unsigned j = 0, ee = RevMap[SS].size(); j != ee; ++j) { - int RSS = RevMap[SS][j]; - const TargetRegisterClass *RC = LS->getIntervalRegClass(RSS); - // If it's not colored to another stack slot, try coloring it - // to a "free" register. - if (!RC) { - AllColored = false; - continue; - } - unsigned Reg = VRM->getFirstUnusedRegister(RC); - if (!Reg) { - AllColored = false; - continue; - } - if (!AllMemRefsCanBeUnfolded(RSS)) { - AllColored = false; - continue; - } else { - DEBUG(dbgs() << "Assigning fi#" << RSS << " to " - << TRI->getName(Reg) << '\n'); - ColoredRegs.push_back(Reg); - SlotMapping[RSS] = Reg; - SlotIsReg.set(RSS); - Changed = true; - } - } - - // Register and its sub-registers are no longer free. - while (!ColoredRegs.empty()) { - unsigned Reg = ColoredRegs.back(); - ColoredRegs.pop_back(); - VRM->setRegisterUsed(Reg); - // If reg is a callee-saved register, it will have to be spilled in - // the prologue. - MRI->setPhysRegUsed(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { - VRM->setRegisterUsed(*AS); - MRI->setPhysRegUsed(*AS); - } - } - // This spill slot is dead after the rewrites - if (AllColored) { - MFI->RemoveStackObject(SS); - ++NumEliminated; - } - } - DEBUG(dbgs() << '\n'); - - return Changed; -} - /// ColorSlot - Assign a "color" (stack slot) to the specified stack slot. /// int StackSlotColoring::ColorSlot(LiveInterval *li) { @@ -372,7 +259,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { SmallVector SlotMapping(NumObjs, -1); SmallVector SlotWeights(NumObjs, 0.0); SmallVector, 16> RevMap(NumObjs); - BitVector SlotIsReg(NumObjs); BitVector UsedColors(NumObjs); DEBUG(dbgs() << "Color spill slot intervals:\n"); @@ -404,31 +290,19 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { DEBUG(dbgs() << '\n'); #endif - // Can we "color" a stack slot with a unused register? - Changed |= ColorSlotsWithFreeRegs(SlotMapping, RevMap, SlotIsReg); - if (!Changed) return false; // Rewrite all MO_FrameIndex operands. SmallVector, 4> NewDefs(MF.getNumBlockIDs()); for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) { - bool isReg = SlotIsReg[SS]; int NewFI = SlotMapping[SS]; - if (NewFI == -1 || (NewFI == (int)SS && !isReg)) + if (NewFI == -1 || (NewFI == (int)SS)) continue; - const TargetRegisterClass *RC = LS->getIntervalRegClass(SS); SmallVector &RefMIs = SSRefs[SS]; for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) - if (!isReg) - RewriteInstruction(RefMIs[i], SS, NewFI, MF); - else { - // Rewrite to use a register instead. - unsigned MBBId = RefMIs[i]->getParent()->getNumber(); - SmallSet &Defs = NewDefs[MBBId]; - UnfoldAndRewriteInstruction(RefMIs[i], SS, NewFI, RC, Defs, MF); - } + RewriteInstruction(RefMIs[i], SS, NewFI, MF); } // Delete unused stack slots. @@ -441,28 +315,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { return true; } -/// AllMemRefsCanBeUnfolded - Return true if all references of the specified -/// spill slot index can be unfolded. -bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) { - SmallVector &RefMIs = SSRefs[SS]; - for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) { - MachineInstr *MI = RefMIs[i]; - if (TII->isLoadFromStackSlot(MI, SS) || - TII->isStoreToStackSlot(MI, SS)) - // Restore and spill will become copies. - return true; - if (!TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), false, false)) - return false; - for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { - MachineOperand &MO = MI->getOperand(j); - if (MO.isFI() && MO.getIndex() != SS) - // If it uses another frameindex, we can, currently* unfold it. - return false; - } - } - return true; -} - /// RewriteInstruction - Rewrite specified instruction by replacing references /// to old frame index with new one. void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, @@ -489,179 +341,6 @@ void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, (*I)->setValue(NewSV); } -/// PropagateBackward - Traverse backward and look for the definition of -/// OldReg. If it can successfully update all of the references with NewReg, -/// do so and return true. -bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, - MachineBasicBlock *MBB, - unsigned OldReg, unsigned NewReg) { - if (MII == MBB->begin()) - return false; - - SmallVector Uses; - SmallVector Refs; - while (--MII != MBB->begin()) { - bool FoundDef = false; // Not counting 2address def. - - Uses.clear(); - const MCInstrDesc &MCID = MII->getDesc(); - for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MII->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) - continue; - if (Reg == OldReg) { - if (MO.isImplicit()) - return false; - - // Abort the use is actually a sub-register def. We don't have enough - // information to figure out if it is really legal. - if (MO.getSubReg() || MII->isSubregToReg()) - return false; - - const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI); - if (RC && !RC->contains(NewReg)) - return false; - - if (MO.isUse()) { - Uses.push_back(&MO); - } else { - Refs.push_back(&MO); - if (!MII->isRegTiedToUseOperand(i)) - FoundDef = true; - } - } else if (TRI->regsOverlap(Reg, NewReg)) { - return false; - } else if (TRI->regsOverlap(Reg, OldReg)) { - if (!MO.isUse() || !MO.isKill()) - return false; - } - } - - if (FoundDef) { - // Found non-two-address def. Stop here. - for (unsigned i = 0, e = Refs.size(); i != e; ++i) - Refs[i]->setReg(NewReg); - return true; - } - - // Two-address uses must be updated as well. - for (unsigned i = 0, e = Uses.size(); i != e; ++i) - Refs.push_back(Uses[i]); - } - return false; -} - -/// PropagateForward - Traverse forward and look for the kill of OldReg. If -/// it can successfully update all of the uses with NewReg, do so and -/// return true. -bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, - MachineBasicBlock *MBB, - unsigned OldReg, unsigned NewReg) { - if (MII == MBB->end()) - return false; - - SmallVector Uses; - while (++MII != MBB->end()) { - bool FoundKill = false; - const MCInstrDesc &MCID = MII->getDesc(); - for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MII->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) - continue; - if (Reg == OldReg) { - if (MO.isDef() || MO.isImplicit()) - return false; - - // Abort the use is actually a sub-register use. We don't have enough - // information to figure out if it is really legal. - if (MO.getSubReg()) - return false; - - const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI); - if (RC && !RC->contains(NewReg)) - return false; - if (MO.isKill()) - FoundKill = true; - - Uses.push_back(&MO); - } else if (TRI->regsOverlap(Reg, NewReg) || - TRI->regsOverlap(Reg, OldReg)) - return false; - } - if (FoundKill) { - for (unsigned i = 0, e = Uses.size(); i != e; ++i) - Uses[i]->setReg(NewReg); - return true; - } - } - return false; -} - -/// UnfoldAndRewriteInstruction - Rewrite specified instruction by unfolding -/// folded memory references and replacing those references with register -/// references instead. -void -StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, - unsigned Reg, - const TargetRegisterClass *RC, - SmallSet &Defs, - MachineFunction &MF) { - MachineBasicBlock *MBB = MI->getParent(); - if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) { - if (PropagateForward(MI, MBB, DstReg, Reg)) { - DEBUG(dbgs() << "Eliminated load: "); - DEBUG(MI->dump()); - ++NumLoadElim; - } else { - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), - DstReg).addReg(Reg); - ++NumRegRepl; - } - - if (!Defs.count(Reg)) { - // If this is the first use of Reg in this MBB and it wasn't previously - // defined in MBB, add it to livein. - MBB->addLiveIn(Reg); - Defs.insert(Reg); - } - } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) { - if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) { - DEBUG(dbgs() << "Eliminated store: "); - DEBUG(MI->dump()); - ++NumStoreElim; - } else { - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), Reg) - .addReg(SrcReg); - ++NumRegRepl; - } - - // Remember reg has been defined in MBB. - Defs.insert(Reg); - } else { - SmallVector NewMIs; - bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs); - (void)Success; // Silence compiler warning. - assert(Success && "Failed to unfold!"); - MachineInstr *NewMI = NewMIs[0]; - MBB->insert(MI, NewMI); - ++NumRegRepl; - - if (NewMI->readsRegister(Reg)) { - if (!Defs.count(Reg)) - // If this is the first use of Reg in this MBB and it wasn't previously - // defined in MBB, add it to livein. - MBB->addLiveIn(Reg); - Defs.insert(Reg); - } - } - MBB->erase(MI); -} /// RemoveDeadStores - Scan through a basic block and look for loads followed /// by stores. If they're both using the same stack slot, then the store is @@ -679,33 +358,33 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { I != E; ++I) { if (DCELimit != -1 && (int)NumDead >= DCELimit) break; - + MachineBasicBlock::iterator NextMI = llvm::next(I); if (NextMI == MBB->end()) continue; - + int FirstSS, SecondSS; unsigned LoadReg = 0; unsigned StoreReg = 0; if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue; if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue; if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue; - + ++NumDead; changed = true; - + if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) { ++NumDead; toErase.push_back(I); } - + toErase.push_back(NextMI); ++I; } - + for (SmallVector::iterator I = toErase.begin(), E = toErase.end(); I != E; ++I) (*I)->eraseFromParent(); - + return changed; } @@ -713,32 +392,27 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { DEBUG({ dbgs() << "********** Stack Slot Coloring **********\n" - << "********** Function: " + << "********** Function: " << MF.getFunction()->getName() << '\n'; }); MFI = MF.getFrameInfo(); - MRI = &MF.getRegInfo(); TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); LS = &getAnalysis(); - VRM = &getAnalysis(); loopInfo = &getAnalysis(); bool Changed = false; unsigned NumSlots = LS->getNumIntervals(); - if (NumSlots < 2) { - if (NumSlots == 0 || !VRM->HasUnusedRegisters()) - // Nothing to do! - return false; - } + if (NumSlots == 0) + // Nothing to do! + return false; // If there are calls to setjmp or sigsetjmp, don't perform stack slot // coloring. The stack could be modified before the longjmp is executed, // resulting in the wrong value being used afterwards. (See // .) - if (MF.callsSetJmp()) + if (MF.exposesReturnsTwice()) return false; // Gather spill slot references diff --git a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp index 260cc0ee50a5..c6fdc7382435 100644 --- a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp +++ b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp @@ -228,7 +228,6 @@ static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) { return &MO; } } - return NULL; } bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { @@ -390,12 +389,10 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { MachineOperand *LastUse = findLastUse(MBB, SrcReg); assert(LastUse); SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent()); - SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB)); + SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB)); LastUse->setIsKill(true); } - LI->renumber(); - Allocator.Reset(); RegNodeMap.clear(); PHISrcDefs.clear(); @@ -745,7 +742,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, // Set the phi-def flag for the VN at this PHI. SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex()); + VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot()); assert(DestVNI); DestVNI->setIsPHIDef(true); @@ -756,7 +753,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); DestVNI->def = MBBStartIndex; DestLI.addRange(LiveRange(MBBStartIndex, - PHIIndex.getDefIndex(), + PHIIndex.getRegSlot(), DestVNI)); return; } @@ -779,22 +776,21 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr); VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex, - CopyInstr, LI->getVNInfoAllocator()); CopyVNI->setIsPHIDef(true); CopyLI.addRange(LiveRange(MBBStartIndex, - DestCopyIndex.getDefIndex(), + DestCopyIndex.getRegSlot(), CopyVNI)); // Adjust DestReg's live interval to adjust for its new definition at // CopyInstr. LiveInterval &DestLI = LI->getOrCreateInterval(DestReg); SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex()); + DestLI.removeRange(PHIIndex.getRegSlot(), DestCopyIndex.getRegSlot()); - VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex()); + VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); assert(DestVNI); - DestVNI->def = DestCopyIndex.getDefIndex(); + DestVNI->def = DestCopyIndex.getRegSlot(); InsertedDestCopies[CopyReg] = CopyInstr; } diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp index 3a6211a0f3e6..8ebfbcae785b 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp @@ -56,10 +56,10 @@ typedef std::vector > AvailableValsTy; namespace { /// TailDuplicatePass - Perform tail duplication. class TailDuplicatePass : public MachineFunctionPass { - bool PreRegAlloc; const TargetInstrInfo *TII; MachineModuleInfo *MMI; MachineRegisterInfo *MRI; + bool PreRegAlloc; // SSAUpdateVRs - A list of virtual registers for which to update SSA form. SmallVector SSAUpdateVRs; @@ -70,11 +70,10 @@ namespace { public: static char ID; - explicit TailDuplicatePass(bool PreRA) : - MachineFunctionPass(ID), PreRegAlloc(PreRA) {} + explicit TailDuplicatePass() : + MachineFunctionPass(ID), PreRegAlloc(false) {} virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { return "Tail Duplication"; } private: void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, @@ -118,14 +117,16 @@ namespace { char TailDuplicatePass::ID = 0; } -FunctionPass *llvm::createTailDuplicatePass(bool PreRegAlloc) { - return new TailDuplicatePass(PreRegAlloc); -} +char &llvm::TailDuplicateID = TailDuplicatePass::ID; + +INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", + false, false) bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); MRI = &MF.getRegInfo(); MMI = getAnalysisIfAvailable(); + PreRegAlloc = MRI->isSSA(); bool MadeChange = false; while (TailDuplicateBlocks(MF)) @@ -432,7 +433,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, MO.setReg(VI->second); } } - PredBB->insert(PredBB->end(), NewMI); + PredBB->insert(PredBB->instr_end(), NewMI); } /// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor @@ -553,7 +554,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, bool HasIndirectbr = false; if (!TailBB.empty()) - HasIndirectbr = TailBB.back().getDesc().isIndirectBranch(); + HasIndirectbr = TailBB.back().isIndirectBranch(); if (HasIndirectbr && PreRegAlloc) MaxDuplicateCount = 20; @@ -561,22 +562,21 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; - for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end(); - ++I) { + for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) { // Non-duplicable things shouldn't be tail-duplicated. - if (I->getDesc().isNotDuplicable()) + if (I->isNotDuplicable()) return false; // Do not duplicate 'return' instructions if this is a pre-regalloc run. // A return may expand into a lot more instructions (e.g. reload of callee // saved registers) after PEI. - if (PreRegAlloc && I->getDesc().isReturn()) + if (PreRegAlloc && I->isReturn()) return false; // Avoid duplicating calls before register allocation. Calls presents a // barrier to register allocation so duplicating them may end up increasing // spills. - if (PreRegAlloc && I->getDesc().isCall()) + if (PreRegAlloc && I->isCall()) return false; if (!I->isPHI() && !I->isDebugValue()) @@ -611,7 +611,7 @@ TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) { ++I; if (I == E) return true; - return I->getDesc().isUnconditionalBranch(); + return I->isUnconditionalBranch(); } static bool @@ -778,8 +778,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, // Clone the contents of TailBB into PredBB. DenseMap LocalVRMap; SmallVector, 4> CopyInfos; - MachineBasicBlock::iterator I = TailBB->begin(); - while (I != TailBB->end()) { + // Use instr_iterator here to properly handle bundles, e.g. + // ARM Thumb2 IT block. + MachineBasicBlock::instr_iterator I = TailBB->instr_begin(); + while (I != TailBB->instr_end()) { MachineInstr *MI = &*I; ++I; if (MI->isPHI()) { @@ -824,7 +826,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, SmallVector PriorCond; // This has to check PrevBB->succ_size() because EH edges are ignored by // AnalyzeBranch. - if (PrevBB->succ_size() == 1 && + if (PrevBB->succ_size() == 1 && !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && !TailBB->hasAddressTaken()) { @@ -849,6 +851,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. MachineInstr *MI = &*I++; + assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); MI->eraseFromParent(); } diff --git a/contrib/llvm/lib/Target/TargetFrameLowering.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp similarity index 95% rename from contrib/llvm/lib/Target/TargetFrameLowering.cpp rename to contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index 122f8696e2ce..cadb87815dbe 100644 --- a/contrib/llvm/lib/Target/TargetFrameLowering.cpp +++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -1,4 +1,4 @@ -//===----- TargetFrameLowering.cpp - Implement target frame interface ------==// +//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp index f32678f12b0a..2beb9281e35b 100644 --- a/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -77,6 +78,9 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0; unsigned Reg1 = MI->getOperand(Idx1).getReg(); unsigned Reg2 = MI->getOperand(Idx2).getReg(); + unsigned SubReg0 = HasDef ? MI->getOperand(0).getSubReg() : 0; + unsigned SubReg1 = MI->getOperand(Idx1).getSubReg(); + unsigned SubReg2 = MI->getOperand(Idx2).getSubReg(); bool Reg1IsKill = MI->getOperand(Idx1).isKill(); bool Reg2IsKill = MI->getOperand(Idx2).isKill(); // If destination is tied to either of the commuted source register, then @@ -85,10 +89,12 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) { Reg2IsKill = false; Reg0 = Reg2; + SubReg0 = SubReg2; } else if (HasDef && Reg0 == Reg2 && MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) { Reg1IsKill = false; Reg0 = Reg1; + SubReg0 = SubReg1; } if (NewMI) { @@ -97,19 +103,23 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, MachineFunction &MF = *MI->getParent()->getParent(); if (HasDef) return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) - .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead)) - .addReg(Reg2, getKillRegState(Reg2IsKill)) - .addReg(Reg1, getKillRegState(Reg2IsKill)); + .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead), SubReg0) + .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2) + .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1); else return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) - .addReg(Reg2, getKillRegState(Reg2IsKill)) - .addReg(Reg1, getKillRegState(Reg2IsKill)); + .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2) + .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1); } - if (HasDef) + if (HasDef) { MI->getOperand(0).setReg(Reg0); + MI->getOperand(0).setSubReg(SubReg0); + } MI->getOperand(Idx2).setReg(Reg1); MI->getOperand(Idx1).setReg(Reg2); + MI->getOperand(Idx2).setSubReg(SubReg1); + MI->getOperand(Idx1).setSubReg(SubReg2); MI->getOperand(Idx2).setIsKill(Reg1IsKill); MI->getOperand(Idx1).setIsKill(Reg2IsKill); return MI; @@ -121,6 +131,9 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { + assert(!MI->isBundle() && + "TargetInstrInfoImpl::findCommutedOpIndices() can't handle bundles"); + const MCInstrDesc &MCID = MI->getDesc(); if (!MCID.isCommutable()) return false; @@ -136,11 +149,28 @@ bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI, } +bool +TargetInstrInfoImpl::isUnpredicatedTerminator(const MachineInstr *MI) const { + if (!MI->isTerminator()) return false; + + // Conditional branch is a special case. + if (MI->isBranch() && !MI->isBarrier()) + return true; + if (!MI->isPredicable()) + return true; + return !isPredicated(MI); +} + + bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI, const SmallVectorImpl &Pred) const { bool MadeChange = false; + + assert(!MI->isBundle() && + "TargetInstrInfoImpl::PredicateInstruction() can't handle bundles"); + const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isPredicable()) + if (!MI->isPredicable()) return false; for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -218,7 +248,7 @@ TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0, MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig, MachineFunction &MF) const { - assert(!Orig->getDesc().isNotDuplicable() && + assert(!Orig->isNotDuplicable() && "Instruction cannot be duplicated"); return MF.CloneMachineInstr(Orig); } @@ -288,16 +318,15 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) { // Add a memory operand, foldMemoryOperandImpl doesn't do that. assert((!(Flags & MachineMemOperand::MOStore) || - NewMI->getDesc().mayStore()) && + NewMI->mayStore()) && "Folded a def to a non-store!"); assert((!(Flags & MachineMemOperand::MOLoad) || - NewMI->getDesc().mayLoad()) && + NewMI->mayLoad()) && "Folded a use to a non-load!"); const MachineFrameInfo &MFI = *MF.getFrameInfo(); assert(MFI.getObjectOffset(FI) != -1); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), Flags, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); NewMI->addMemOperand(MF, MMO); @@ -332,7 +361,7 @@ MachineInstr* TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl &Ops, MachineInstr* LoadMI) const { - assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!"); + assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!"); #ifndef NDEBUG for (unsigned i = 0, e = Ops.size(); i != e; ++i) assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!"); @@ -360,7 +389,6 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, const MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetMachine &TM = MF.getTarget(); const TargetInstrInfo &TII = *TM.getInstrInfo(); - const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); // Remat clients assume operand 0 is the defined register. if (!MI->getNumOperands() || !MI->getOperand(0).isReg()) @@ -383,10 +411,8 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx)) return true; - const MCInstrDesc &MCID = MI->getDesc(); - // Avoid instructions obviously unsafe for remat. - if (MCID.isNotDuplicable() || MCID.mayStore() || + if (MI->isNotDuplicable() || MI->mayStore() || MI->hasUnmodeledSideEffects()) return false; @@ -396,7 +422,7 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, return false; // Avoid instructions which load from potentially varying memory. - if (MCID.mayLoad() && !MI->isInvariantLoad(AA)) + if (MI->mayLoad() && !MI->isInvariantLoad(AA)) return false; // If any of the registers accessed are non-constant, conservatively assume @@ -414,19 +440,8 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. - if (!MRI.def_empty(Reg)) + if (!MRI.isConstantPhysReg(Reg, MF)) return false; - BitVector AllocatableRegs = TRI.getAllocatableSet(MF, 0); - if (AllocatableRegs.test(Reg)) - return false; - // Check for a def among the register's aliases too. - for (const unsigned *Alias = TRI.getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - if (!MRI.def_empty(AliasReg)) - return false; - if (AllocatableRegs.test(AliasReg)) - return false; - } } else { // A physreg def. We can't remat it. return false; @@ -457,7 +472,7 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const{ // Terminators and labels can't be scheduled around. - if (MI->getDesc().isTerminator() || MI->isLabel()) + if (MI->isTerminator() || MI->isLabel()) return true; // Don't attempt to schedule around any instruction that defines @@ -493,3 +508,32 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, return (ScheduleHazardRecognizer *) new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched"); } + +int +TargetInstrInfoImpl::getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, unsigned UseIdx) const { + if (!ItinData || ItinData->isEmpty()) + return -1; + + if (!DefNode->isMachineOpcode()) + return -1; + + unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass(); + if (!UseNode->isMachineOpcode()) + return ItinData->getOperandCycle(DefClass, DefIdx); + unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass(); + return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); +} + +int TargetInstrInfoImpl::getInstrLatency(const InstrItineraryData *ItinData, + SDNode *N) const { + if (!ItinData || ItinData->isEmpty()) + return 1; + + if (!N->isMachineOpcode()) + return 1; + + return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass()); +} + diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 3848f4d4d4c4..9925185be120 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -17,6 +17,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" +#include "llvm/Module.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -53,11 +54,9 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, report_fatal_error("We do not support this DWARF encoding yet!"); case dwarf::DW_EH_PE_absptr: return Mang->getSymbol(GV); - break; case dwarf::DW_EH_PE_pcrel: { return getContext().GetOrCreateSymbol(StringRef("DW.ref.") + Mang->getSymbol(GV)->getName()); - break; } } } @@ -78,14 +77,14 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, Flags, SectionKind::getDataRel(), 0, Label->getName()); + unsigned Size = TM.getTargetData()->getPointerSize(); Streamer.SwitchSection(Sec); - Streamer.EmitValueToAlignment(8); + Streamer.EmitValueToAlignment(TM.getTargetData()->getPointerABIAlignment()); Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); - const MCExpr *E = MCConstantExpr::Create(8, getContext()); + const MCExpr *E = MCConstantExpr::Create(Size, getContext()); Streamer.EmitELFSize(Label, E); Streamer.EmitLabel(Label); - unsigned Size = TM.getTargetData()->getPointerSize(); Streamer.EmitSymbolValue(Sym, Size); } @@ -189,6 +188,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, static const char *getSectionPrefixForGlobal(SectionKind Kind) { if (Kind.isText()) return ".text."; if (Kind.isReadOnly()) return ".rodata."; + if (Kind.isBSS()) return ".bss."; if (Kind.isThreadData()) return ".tdata."; if (Kind.isThreadBSS()) return ".tbss."; @@ -217,7 +217,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // If this global is linkonce/weak and the target handles this by emitting it // into a 'uniqued' section name, create and return the section now. if ((GV->isWeakForLinker() || EmitUniquedSection) && - !Kind.isCommon() && !Kind.isBSS()) { + !Kind.isCommon()) { const char *Prefix; Prefix = getSectionPrefixForGlobal(Kind); @@ -342,10 +342,92 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer); } +const MCSection * +TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const { + // The default scheme is .ctor / .dtor, so we have to invert the priority + // numbering. + if (Priority == 65535) + return StaticCtorSection; + + std::string Name = std::string(".ctors.") + utostr(65535 - Priority); + return getContext().getELFSection(Name, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getDataRel()); +} + +const MCSection * +TargetLoweringObjectFileELF::getStaticDtorSection(unsigned Priority) const { + // The default scheme is .ctor / .dtor, so we have to invert the priority + // numbering. + if (Priority == 65535) + return StaticDtorSection; + + std::string Name = std::string(".dtors.") + utostr(65535 - Priority); + return getContext().getELFSection(Name, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getDataRel()); +} + //===----------------------------------------------------------------------===// // MachO //===----------------------------------------------------------------------===// +/// emitModuleFlags - Emit the module flags that specify the garbage collection +/// information. +void TargetLoweringObjectFileMachO:: +emitModuleFlags(MCStreamer &Streamer, + ArrayRef ModuleFlags, + Mangler *Mang, const TargetMachine &TM) const { + unsigned VersionVal = 0; + unsigned GCFlags = 0; + StringRef SectionVal; + + for (ArrayRef::iterator + i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) { + const Module::ModuleFlagEntry &MFE = *i; + + // Ignore flags with 'Require' behavior. + if (MFE.Behavior == Module::Require) + continue; + + StringRef Key = MFE.Key->getString(); + Value *Val = MFE.Val; + + if (Key == "Objective-C Image Info Version") + VersionVal = cast(Val)->getZExtValue(); + else if (Key == "Objective-C Garbage Collection" || + Key == "Objective-C GC Only") + GCFlags |= cast(Val)->getZExtValue(); + else if (Key == "Objective-C Image Info Section") + SectionVal = cast(Val)->getString(); + } + + // The section is mandatory. If we don't have it, then we don't have GC info. + if (SectionVal.empty()) return; + + StringRef Segment, Section; + unsigned TAA = 0, StubSize = 0; + bool TAAParsed; + std::string ErrorCode = + MCSectionMachO::ParseSectionSpecifier(SectionVal, Segment, Section, + TAA, TAAParsed, StubSize); + if (!ErrorCode.empty()) + // If invalid, report the error with report_fatal_error. + report_fatal_error("Invalid section specifier '" + Section + "': " + + ErrorCode + "."); + + // Get the section. + const MCSectionMachO *S = + getContext().getMachOSection(Segment, Section, TAA, StubSize, + SectionKind::getDataNoRel()); + Streamer.SwitchSection(S); + Streamer.EmitLabel(getContext(). + GetOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO"))); + Streamer.EmitIntValue(VersionVal, 4); + Streamer.EmitIntValue(GCFlags, 4); + Streamer.AddBlankLine(); +} + const MCSection *TargetLoweringObjectFileMachO:: getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { @@ -358,11 +440,9 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, TAA, TAAParsed, StubSize); if (!ErrorCode.empty()) { // If invalid, report the error with report_fatal_error. - report_fatal_error("Global variable '" + GV->getNameStr() + - "' has an invalid section specifier '" + GV->getSection()+ - "': " + ErrorCode + "."); - // Fall back to dropping it into the data section. - return DataSection; + report_fatal_error("Global variable '" + GV->getName() + + "' has an invalid section specifier '" + + GV->getSection() + "': " + ErrorCode + "."); } // Get the section. @@ -379,9 +459,9 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, // to reject it here. if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) { // If invalid, report the error with report_fatal_error. - report_fatal_error("Global variable '" + GV->getNameStr() + - "' section type or attributes does not match previous" - " section specifier"); + report_fatal_error("Global variable '" + GV->getName() + + "' section type or attributes does not match previous" + " section specifier"); } return S; @@ -536,9 +616,7 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, // Add information about the stub reference to MachOMMI so that the stub // gets emitted by the asmprinter. MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); - MachineModuleInfoImpl::StubValueTy &StubSym = - GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) : - MachOMMI.getGVStubEntry(SSym); + MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { MCSymbol *Sym = Mang->getSymbol(GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); @@ -568,6 +646,11 @@ getCOFFSectionFlags(SectionKind K) { COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE; + else if (K.isThreadLocal()) + Flags |= + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; else if (K.isReadOnly()) Flags |= COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | @@ -594,6 +677,8 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { return ".text$"; if (Kind.isBSS ()) return ".bss$"; + if (Kind.isThreadLocal()) + return ".tls$"; if (Kind.isWriteable()) return ".data$"; return ".rdata$"; @@ -603,7 +688,6 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { const MCSection *TargetLoweringObjectFileCOFF:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { - assert(!Kind.isThreadLocal() && "Doesn't support TLS"); // If this global is linkonce/weak and the target handles this by emitting it // into a 'uniqued' section name, create and return the section now. @@ -624,6 +708,9 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, if (Kind.isText()) return getTextSection(); + if (Kind.isThreadLocal()) + return getTLSDataSection(); + return getDataSection(); } diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp new file mode 100644 index 000000000000..0f59d0169e18 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp @@ -0,0 +1,52 @@ +//===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the methods in the TargetOptions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +/// DisableFramePointerElim - This returns true if frame pointer elimination +/// optimization should be disabled for the given machine function. +bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { + // Check to see if we should eliminate non-leaf frame pointers and then + // check to see if we should eliminate all frame pointers. + if (NoFramePointerElimNonLeaf && !NoFramePointerElim) { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->hasCalls(); + } + + return NoFramePointerElim; +} + +/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option +/// is specified on the command line. When this flag is off(default), the +/// code generator is not allowed to generate mad (multiply add) if the +/// result is "less precise" than doing those operations individually. +bool TargetOptions::LessPreciseFPMAD() const { + return UnsafeFPMath || LessPreciseFPMADOption; +} + +/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume +/// that the rounding mode of the FPU can change from its default. +bool TargetOptions::HonorSignDependentRoundingFPMath() const { + return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption; +} + +/// getTrapFunctionName - If this returns a non-empty string, this means isel +/// should lower Intrinsic::trap to a call to the specified function name +/// instead of an ISD::TRAP node. +StringRef TargetOptions::getTrapFunctionName() const { + return TrapFuncName; +} + diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index d87937822280..c30b1333bb2a 100644 --- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -36,6 +36,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -56,14 +57,18 @@ STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address"); STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); STATISTIC(NumReMats, "Number of instructions re-materialized"); STATISTIC(NumDeletes, "Number of dead instructions deleted"); +STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up"); +STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down"); namespace { class TwoAddressInstructionPass : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const InstrItineraryData *InstrItins; MachineRegisterInfo *MRI; LiveVariables *LV; AliasAnalysis *AA; + CodeGenOpt::Level OptLevel; // DistanceMap - Keep track the distance of a MI from the start of the // current basic block. @@ -120,6 +125,18 @@ namespace { MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, unsigned Dist); + bool isDefTooClose(unsigned Reg, unsigned Dist, + MachineInstr *MI, MachineBasicBlock *MBB); + + bool RescheduleMIBelowKill(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg); + bool RescheduleKillAboveMI(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg); + bool TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, @@ -152,7 +169,6 @@ namespace { AU.addPreserved(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); - AU.addPreservedID(PHIEliminationID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -225,12 +241,12 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, // appropriate location, we can try to sink the current instruction // past it. if (!KillMI || KillMI->getParent() != MBB || KillMI == MI || - KillMI->getDesc().isTerminator()) + KillMI->isTerminator()) return false; // If any of the definitions are used by another instruction between the // position and the kill use, then it's not safe to sink it. - // + // // FIXME: This can be sped up if there is an easy way to query whether an // instruction is before or after another instruction. Then we can use // MachineRegisterInfo def / use instead. @@ -273,7 +289,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, KillMO->setIsKill(false); KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI); KillMO->setIsKill(true); - + if (LV) LV->replaceKillInstruction(SavedReg, KillMI, MI); @@ -319,7 +335,7 @@ TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg, continue; // Current use. OtherUse = true; // There is at least one other use in the MBB that will clobber the - // register. + // register. if (isTwoAddrUse(UseMI, Reg)) return true; } @@ -467,6 +483,32 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { return false; } +/// findLocalKill - Look for an instruction below MI in the MBB that kills the +/// specified register. Returns null if there are any other Reg use between the +/// instructions. +static +MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB, + MachineInstr *MI, MachineRegisterInfo *MRI, + DenseMap &DistanceMap) { + MachineInstr *KillMI = 0; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(Reg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (UseMI == MI || UseMI->getParent() != MBB) + continue; + if (DistanceMap.count(UseMI)) + continue; + if (!UI.getOperand().isKill()) + return 0; + if (KillMI) + return 0; // -O0 kill markers cannot be trusted? + KillMI = UseMI; + } + + return KillMI; +} + /// findOnlyInterestingUse - Given a register, if has a single in-basic block /// use, return the use instruction if it's a copy or a two-address use. static @@ -528,6 +570,9 @@ bool TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC, MachineInstr *MI, MachineBasicBlock *MBB, unsigned Dist) { + if (OptLevel == CodeGenOpt::None) + return false; + // Determine if it's profitable to commute this two address instruction. In // general, we want no uses between this instruction and the definition of // the two-address register. @@ -544,7 +589,7 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC, // %reg1029 = MOV8rr %reg1028 // %reg1029 = SHR8ri %reg1029, 7, %EFLAGS // insert => %reg1030 = MOV8rr %reg1029 - // %reg1030 = ADD8rr %reg1029, %reg1028, %EFLAGS + // %reg1030 = ADD8rr %reg1029, %reg1028, %EFLAGS if (!MI->killsRegister(regC)) return false; @@ -770,10 +815,9 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, static bool isSafeToDelete(MachineInstr *MI, const TargetInstrInfo *TII, SmallVector &Kills) { - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.mayStore() || MCID.isCall()) + if (MI->mayStore() || MI->isCall()) return false; - if (MCID.isTerminator() || MI->hasUnmodeledSideEffects()) + if (MI->isTerminator() || MI->hasUnmodeledSideEffects()) return false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -852,28 +896,316 @@ TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi, return true; } +/// RescheduleMIBelowKill - If there is one more local instruction that reads +/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill +/// instruction in order to eliminate the need for the copy. +bool +TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg) { + MachineInstr *MI = &*mi; + DenseMap::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + // Must be created from unfolded load. Don't waste time trying this. + return false; + + MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap); + if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike()) + // Don't mess with copies, they may be coalesced later. + return false; + + if (KillMI->hasUnmodeledSideEffects() || KillMI->isCall() || + KillMI->isBranch() || KillMI->isTerminator()) + // Don't move pass calls, etc. + return false; + + unsigned DstReg; + if (isTwoAddrUse(*KillMI, Reg, DstReg)) + return false; + + bool SeenStore = true; + if (!MI->isSafeToMove(TII, AA, SeenStore)) + return false; + + if (TII->getInstrLatency(InstrItins, MI) > 1) + // FIXME: Needs more sophisticated heuristics. + return false; + + SmallSet Uses; + SmallSet Kills; + SmallSet Defs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isDef()) + Defs.insert(MOReg); + else { + Uses.insert(MOReg); + if (MO.isKill() && MOReg != Reg) + Kills.insert(MOReg); + } + } + + // Move the copies connected to MI down as well. + MachineBasicBlock::iterator From = MI; + MachineBasicBlock::iterator To = llvm::next(From); + while (To->isCopy() && Defs.count(To->getOperand(1).getReg())) { + Defs.insert(To->getOperand(0).getReg()); + ++To; + } + + // Check if the reschedule will not break depedencies. + unsigned NumVisited = 0; + MachineBasicBlock::iterator KillPos = KillMI; + ++KillPos; + for (MachineBasicBlock::iterator I = To; I != KillPos; ++I) { + MachineInstr *OtherMI = I; + // DBG_VALUE cannot be counted against the limit. + if (OtherMI->isDebugValue()) + continue; + if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. + return false; + ++NumVisited; + if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() || + OtherMI->isBranch() || OtherMI->isTerminator()) + // Don't move pass calls, etc. + return false; + for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = OtherMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isDef()) { + if (Uses.count(MOReg)) + // Physical register use would be clobbered. + return false; + if (!MO.isDead() && Defs.count(MOReg)) + // May clobber a physical register def. + // FIXME: This may be too conservative. It's ok if the instruction + // is sunken completely below the use. + return false; + } else { + if (Defs.count(MOReg)) + return false; + if (MOReg != Reg && + ((MO.isKill() && Uses.count(MOReg)) || Kills.count(MOReg))) + // Don't want to extend other live ranges and update kills. + return false; + } + } + } + + // Move debug info as well. + while (From != MBB->begin() && llvm::prior(From)->isDebugValue()) + --From; + + // Copies following MI may have been moved as well. + nmi = To; + MBB->splice(KillPos, MBB, From, To); + DistanceMap.erase(DI); + + if (LV) { + // Update live variables + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + } else { + for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = KillMI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) + continue; + MO.setIsKill(false); + } + MI->addRegisterKilled(Reg, 0); + } + + return true; +} + +/// isDefTooClose - Return true if the re-scheduling will put the given +/// instruction too close to the defs of its register dependencies. +bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, + MachineInstr *MI, + MachineBasicBlock *MBB) { + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), + DE = MRI->def_end(); DI != DE; ++DI) { + MachineInstr *DefMI = &*DI; + if (DefMI->getParent() != MBB || DefMI->isCopy() || DefMI->isCopyLike()) + continue; + if (DefMI == MI) + return true; // MI is defining something KillMI uses + DenseMap::iterator DDI = DistanceMap.find(DefMI); + if (DDI == DistanceMap.end()) + return true; // Below MI + unsigned DefDist = DDI->second; + assert(Dist > DefDist && "Visited def already?"); + if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist - DefDist)) + return true; + } + return false; +} + +/// RescheduleKillAboveMI - If there is one more local instruction that reads +/// 'Reg' and it kills 'Reg, consider moving the kill instruction above the +/// current two-address instruction in order to eliminate the need for the +/// copy. +bool +TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg) { + MachineInstr *MI = &*mi; + DenseMap::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + // Must be created from unfolded load. Don't waste time trying this. + return false; + + MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap); + if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike()) + // Don't mess with copies, they may be coalesced later. + return false; + + unsigned DstReg; + if (isTwoAddrUse(*KillMI, Reg, DstReg)) + return false; + + bool SeenStore = true; + if (!KillMI->isSafeToMove(TII, AA, SeenStore)) + return false; + + SmallSet Uses; + SmallSet Kills; + SmallSet Defs; + SmallSet LiveDefs; + for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = KillMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (MO.isUse()) { + if (!MOReg) + continue; + if (isDefTooClose(MOReg, DI->second, MI, MBB)) + return false; + Uses.insert(MOReg); + if (MO.isKill() && MOReg != Reg) + Kills.insert(MOReg); + } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { + Defs.insert(MOReg); + if (!MO.isDead()) + LiveDefs.insert(MOReg); + } + } + + // Check if the reschedule will not break depedencies. + unsigned NumVisited = 0; + MachineBasicBlock::iterator KillPos = KillMI; + for (MachineBasicBlock::iterator I = mi; I != KillPos; ++I) { + MachineInstr *OtherMI = I; + // DBG_VALUE cannot be counted against the limit. + if (OtherMI->isDebugValue()) + continue; + if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. + return false; + ++NumVisited; + if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() || + OtherMI->isBranch() || OtherMI->isTerminator()) + // Don't move pass calls, etc. + return false; + SmallVector OtherDefs; + for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = OtherMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isUse()) { + if (Defs.count(MOReg)) + // Moving KillMI can clobber the physical register if the def has + // not been seen. + return false; + if (Kills.count(MOReg)) + // Don't want to extend other live ranges and update kills. + return false; + } else { + OtherDefs.push_back(MOReg); + } + } + + for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) { + unsigned MOReg = OtherDefs[i]; + if (Uses.count(MOReg)) + return false; + if (TargetRegisterInfo::isPhysicalRegister(MOReg) && + LiveDefs.count(MOReg)) + return false; + // Physical register def is seen. + Defs.erase(MOReg); + } + } + + // Move the old kill above MI, don't forget to move debug info as well. + MachineBasicBlock::iterator InsertPos = mi; + while (InsertPos != MBB->begin() && llvm::prior(InsertPos)->isDebugValue()) + --InsertPos; + MachineBasicBlock::iterator From = KillMI; + MachineBasicBlock::iterator To = llvm::next(From); + while (llvm::prior(From)->isDebugValue()) + --From; + MBB->splice(InsertPos, MBB, From, To); + + nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr. + DistanceMap.erase(DI); + + if (LV) { + // Update live variables + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + } else { + for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = KillMI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) + continue; + MO.setIsKill(false); + } + MI->addRegisterKilled(Reg, 0); + } + return true; +} + /// TryInstructionTransform - For the case where an instruction has a single /// pair of tied register operands, attempt some transformations that may /// either eliminate the tied operands or improve the opportunities for -/// coalescing away the register copy. Returns true if the tied operands -/// are eliminated altogether. +/// coalescing away the register copy. Returns true if no copy needs to be +/// inserted to untie mi's operands (either because they were untied, or +/// because mi was rescheduled, and will be visited again later). bool TwoAddressInstructionPass:: TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, unsigned SrcIdx, unsigned DstIdx, unsigned Dist, SmallPtrSet &Processed) { - const MCInstrDesc &MCID = mi->getDesc(); - unsigned regA = mi->getOperand(DstIdx).getReg(); - unsigned regB = mi->getOperand(SrcIdx).getReg(); + if (OptLevel == CodeGenOpt::None) + return false; + + MachineInstr &MI = *mi; + unsigned regA = MI.getOperand(DstIdx).getReg(); + unsigned regB = MI.getOperand(SrcIdx).getReg(); assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot make instruction into two-address form"); // If regA is dead and the instruction can be deleted, just delete // it so it doesn't clobber regB. - bool regBKilled = isKilled(*mi, regB, MRI, TII); - if (!regBKilled && mi->getOperand(DstIdx).isDead() && + bool regBKilled = isKilled(MI, regB, MRI, TII); + if (!regBKilled && MI.getOperand(DstIdx).isDead() && DeleteUnusedInstr(mi, nmi, mbbi, Dist)) { ++NumDeletes; return true; // Done with this instruction. @@ -885,20 +1217,20 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, unsigned regCIdx = ~0U; bool TryCommute = false; bool AggressiveCommute = false; - if (MCID.isCommutable() && mi->getNumOperands() >= 3 && - TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) { + if (MI.isCommutable() && MI.getNumOperands() >= 3 && + TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) { if (SrcIdx == SrcOp1) regCIdx = SrcOp2; else if (SrcIdx == SrcOp2) regCIdx = SrcOp1; if (regCIdx != ~0U) { - regC = mi->getOperand(regCIdx).getReg(); - if (!regBKilled && isKilled(*mi, regC, MRI, TII)) + regC = MI.getOperand(regCIdx).getReg(); + if (!regBKilled && isKilled(MI, regC, MRI, TII)) // If C dies but B does not, swap the B and C operands. // This makes the live ranges of A and C joinable. TryCommute = true; - else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) { + else if (isProfitableToCommute(regB, regC, &MI, mbbi, Dist)) { TryCommute = true; AggressiveCommute = true; } @@ -913,10 +1245,17 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, return false; } + // If there is one more use of regB later in the same MBB, consider + // re-schedule this MI below it. + if (RescheduleMIBelowKill(mbbi, mi, nmi, regB)) { + ++NumReSchedDowns; + return true; + } + if (TargetRegisterInfo::isVirtualRegister(regA)) ScanUses(regA, &*mbbi, Processed); - if (MCID.isConvertibleTo3Addr()) { + if (MI.isConvertibleTo3Addr()) { // This instruction is potentially convertible to a true // three-address instruction. Check if it is profitable. if (!regBKilled || isProfitableToConv3Addr(regA, regB)) { @@ -928,6 +1267,13 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, } } + // If there is one more use of regB later in the same MBB, consider + // re-schedule it before this MI if it's legal. + if (RescheduleKillAboveMI(mbbi, mi, nmi, regB)) { + ++NumReSchedUps; + return true; + } + // If this is an instruction with a load folded into it, try unfolding // the load, e.g. avoid this: // movq %rdx, %rcx @@ -936,11 +1282,11 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // movq (%rax), %rcx // addq %rdx, %rcx // because it's preferable to schedule a load than a register copy. - if (MCID.mayLoad() && !regBKilled) { + if (MI.mayLoad() && !regBKilled) { // Determine if a load can be unfolded. unsigned LoadRegIndex; unsigned NewOpc = - TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(), + TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), /*UnfoldLoad=*/true, /*UnfoldStore=*/false, &LoadRegIndex); @@ -950,12 +1296,12 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineFunction &MF = *mbbi->getParent(); // Unfold the load. - DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi); + DEBUG(dbgs() << "2addr: UNFOLDING: " << MI); const TargetRegisterClass *RC = TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI); unsigned Reg = MRI->createVirtualRegister(RC); SmallVector NewMIs; - if (!TII->unfoldMemoryOperand(MF, mi, Reg, + if (!TII->unfoldMemoryOperand(MF, &MI, Reg, /*UnfoldLoad=*/true,/*UnfoldStore=*/false, NewMIs)) { DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); @@ -986,21 +1332,21 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // Success, or at least we made an improvement. Keep the unfolded // instructions and discard the original. if (LV) { - for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - MachineOperand &MO = mi->getOperand(i); - if (MO.isReg() && + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { if (MO.isUse()) { if (MO.isKill()) { if (NewMIs[0]->killsRegister(MO.getReg())) - LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]); + LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[0]); else { assert(NewMIs[1]->killsRegister(MO.getReg()) && "Kill missing after load unfold!"); - LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]); + LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[1]); } } - } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) { + } else if (LV->removeVirtualRegisterDead(MO.getReg(), &MI)) { if (NewMIs[1]->registerDefIsDead(MO.getReg())) LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]); else { @@ -1013,7 +1359,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, } LV->addVirtualRegisterKilled(Reg, NewMIs[1]); } - mi->eraseFromParent(); + MI.eraseFromParent(); mi = NewMIs[1]; if (TransformSuccess) return true; @@ -1035,18 +1381,19 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, /// runOnMachineFunction - Reduce two-address instructions to two operands. /// bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "Machine Function\n"); const TargetMachine &TM = MF.getTarget(); MRI = &MF.getRegInfo(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); + InstrItins = TM.getInstrItineraryData(); LV = getAnalysisIfAvailable(); AA = &getAnalysis(); + OptLevel = TM.getOptLevel(); bool MadeChange = false; DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); - DEBUG(dbgs() << "********** Function: " + DEBUG(dbgs() << "********** Function: " << MF.getFunction()->getName() << '\n'); // This pass takes the function out of SSA form. @@ -1177,7 +1524,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { // If it's safe and profitable, remat the definition instead of // copying it. if (DefMI && - DefMI->getDesc().isAsCheapAsAMove() && + DefMI->isAsCheapAsAMove() && DefMI->isSafeToReMat(TII, AA, regB) && isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n"); @@ -1248,19 +1595,19 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { MadeChange = true; DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); - } - // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. - if (mi->isInsertSubreg()) { - // From %reg = INSERT_SUBREG %reg, %subreg, subidx - // To %reg:subidx = COPY %subreg - unsigned SubIdx = mi->getOperand(3).getImm(); - mi->RemoveOperand(3); - assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); - mi->getOperand(0).setSubReg(SubIdx); - mi->RemoveOperand(1); - mi->setDesc(TII->get(TargetOpcode::COPY)); - DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. + if (mi->isInsertSubreg()) { + // From %reg = INSERT_SUBREG %reg, %subreg, subidx + // To %reg:subidx = COPY %subreg + unsigned SubIdx = mi->getOperand(3).getImm(); + mi->RemoveOperand(3); + assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); + mi->getOperand(0).setSubReg(SubIdx); + mi->RemoveOperand(1); + mi->setDesc(TII->get(TargetOpcode::COPY)); + DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + } } // Clear TiedOperands here instead of at the top of the loop @@ -1298,6 +1645,36 @@ static void UpdateRegSequenceSrcs(unsigned SrcReg, } } +// Find the first def of Reg, assuming they are all in the same basic block. +static MachineInstr *findFirstDef(unsigned Reg, MachineRegisterInfo *MRI) { + SmallPtrSet Defs; + MachineInstr *First = 0; + for (MachineRegisterInfo::def_iterator RI = MRI->def_begin(Reg); + MachineInstr *MI = RI.skipInstruction(); Defs.insert(MI)) + First = MI; + if (!First) + return 0; + + MachineBasicBlock *MBB = First->getParent(); + MachineBasicBlock::iterator A = First, B = First; + bool Moving; + do { + Moving = false; + if (A != MBB->begin()) { + Moving = true; + --A; + if (Defs.erase(A)) First = A; + } + if (B != MBB->end()) { + Defs.erase(B); + ++B; + Moving = true; + } + } while (Moving && !Defs.empty()); + assert(Defs.empty() && "Instructions outside basic block!"); + return First; +} + /// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are /// EXTRACT_SUBREG from the same register and to the same virtual register /// with different sub-register indices, attempt to combine the @@ -1380,8 +1757,10 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector &Srcs, CanCoalesce = false; break; } - // Keep track of one of the uses. - SomeMI = UseMI; + // Keep track of one of the uses. Preferably the first one which has a + // flag. + if (!SomeMI || UseMI->getOperand(0).isUndef()) + SomeMI = UseMI; } if (!CanCoalesce) continue; @@ -1390,7 +1769,9 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector &Srcs, MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI, SomeMI->getDebugLoc(), TII->get(TargetOpcode::COPY)) - .addReg(DstReg, RegState::Define, NewDstSubIdx) + .addReg(DstReg, RegState::Define | + getUndefRegState(SomeMI->getOperand(0).isUndef()), + NewDstSubIdx) .addReg(SrcReg, 0, NewSrcSubIdx); // Remove all the old extract instructions. @@ -1452,26 +1833,30 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { SmallSet Seen; for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { unsigned SrcReg = MI->getOperand(i).getReg(); + unsigned SrcSubIdx = MI->getOperand(i).getSubReg(); unsigned SubIdx = MI->getOperand(i+1).getImm(); - if (MI->getOperand(i).getSubReg() || - TargetRegisterInfo::isPhysicalRegister(SrcReg)) { - DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); - llvm_unreachable(0); + // DefMI of NULL means the value does not have a vreg in this block + // i.e., its a physical register or a subreg. + // In either case we force a copy to be generated. + MachineInstr *DefMI = NULL; + if (!MI->getOperand(i).getSubReg() && + !TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + DefMI = MRI->getVRegDef(SrcReg); } - MachineInstr *DefMI = MRI->getVRegDef(SrcReg); - if (DefMI->isImplicitDef()) { + if (DefMI && DefMI->isImplicitDef()) { DefMI->eraseFromParent(); continue; } IsImpDef = false; // Remember COPY sources. These might be candidate for coalescing. - if (DefMI->isCopy() && DefMI->getOperand(1).getSubReg()) + if (DefMI && DefMI->isCopy() && DefMI->getOperand(1).getSubReg()) RealSrcs.push_back(DefMI->getOperand(1).getReg()); bool isKill = MI->getOperand(i).isKill(); - if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() || + if (!DefMI || !Seen.insert(SrcReg) || + MI->getParent() != DefMI->getParent() || !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) || !TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg), MRI->getRegClass(SrcReg), SubIdx)) { @@ -1504,9 +1889,9 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc, MI->getDebugLoc(), TII->get(TargetOpcode::COPY)) .addReg(DstReg, RegState::Define, SubIdx) - .addReg(SrcReg, getKillRegState(isKill)); + .addReg(SrcReg, getKillRegState(isKill), SrcSubIdx); MI->getOperand(i).setReg(0); - if (LV && isKill) + if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg)) LV->replaceKillInstruction(SrcReg, MI, CopyMI); DEBUG(dbgs() << "Inserted: " << *CopyMI); } @@ -1519,11 +1904,27 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI); } + // Set flags on the first DstReg def in the basic block. + // It marks the beginning of the live range. All the other defs are + // read-modify-write. + if (MachineInstr *Def = findFirstDef(DstReg, MRI)) { + for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) { + MachineOperand &MO = Def->getOperand(i); + if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg) + MO.setIsUndef(); + } + // Make sure there is a full non-subreg imp-def operand on the + // instruction. This shouldn't be necessary, but it seems that at least + // RAFast requires it. + Def->addRegisterDefined(DstReg, TRI); + DEBUG(dbgs() << "First def: " << *Def); + } + if (IsImpDef) { DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF"); MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) - MI->RemoveOperand(j); + MI->RemoveOperand(j); } else { DEBUG(dbgs() << "Eliminated: " << *MI); MI->eraseFromParent(); diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp index 8a1cdc01c494..3bab93bdc098 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp @@ -16,10 +16,9 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "virtregmap" +#define DEBUG_TYPE "regalloc" #include "VirtRegMap.h" #include "llvm/Function.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -32,12 +31,8 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" #include using namespace llvm; @@ -58,34 +53,11 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { TRI = mf.getTarget().getRegisterInfo(); MF = &mf; - ReMatId = MAX_STACK_SLOT+1; - LowSpillSlot = HighSpillSlot = NO_STACK_SLOT; - Virt2PhysMap.clear(); Virt2StackSlotMap.clear(); - Virt2ReMatIdMap.clear(); Virt2SplitMap.clear(); - Virt2SplitKillMap.clear(); - ReMatMap.clear(); - ImplicitDefed.clear(); - SpillSlotToUsesMap.clear(); - MI2VirtMap.clear(); - SpillPt2VirtMap.clear(); - RestorePt2VirtMap.clear(); - EmergencySpillMap.clear(); - EmergencySpillSlots.clear(); - - SpillSlotToUsesMap.resize(8); - ImplicitDefed.resize(MF->getRegInfo().getNumVirtRegs()); - - allocatableRCRegs.clear(); - for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), - E = TRI->regclass_end(); I != E; ++I) - allocatableRCRegs.insert(std::make_pair(*I, - TRI->getAllocatableSet(mf, *I))); grow(); - return false; } @@ -93,24 +65,12 @@ void VirtRegMap::grow() { unsigned NumRegs = MF->getRegInfo().getNumVirtRegs(); Virt2PhysMap.resize(NumRegs); Virt2StackSlotMap.resize(NumRegs); - Virt2ReMatIdMap.resize(NumRegs); Virt2SplitMap.resize(NumRegs); - Virt2SplitKillMap.resize(NumRegs); - ReMatMap.resize(NumRegs); - ImplicitDefed.resize(NumRegs); } unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); - if (LowSpillSlot == NO_STACK_SLOT) - LowSpillSlot = SS; - if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) - HighSpillSlot = SS; - assert(SS >= LowSpillSlot && "Unexpected low spill slot"); - unsigned Idx = SS-LowSpillSlot; - while (Idx >= SpillSlotToUsesMap.size()) - SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2); ++NumSpillSlots; return SS; } @@ -144,118 +104,6 @@ void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) { Virt2StackSlotMap[virtReg] = SS; } -int VirtRegMap::assignVirtReMatId(unsigned virtReg) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT && - "attempt to assign re-mat id to already spilled register"); - Virt2ReMatIdMap[virtReg] = ReMatId; - return ReMatId++; -} - -void VirtRegMap::assignVirtReMatId(unsigned virtReg, int id) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT && - "attempt to assign re-mat id to already spilled register"); - Virt2ReMatIdMap[virtReg] = id; -} - -int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) { - std::map::iterator I = - EmergencySpillSlots.find(RC); - if (I != EmergencySpillSlots.end()) - return I->second; - return EmergencySpillSlots[RC] = createSpillSlot(RC); -} - -void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) { - if (!MF->getFrameInfo()->isFixedObjectIndex(FI)) { - // If FI < LowSpillSlot, this stack reference was produced by - // instruction selection and is not a spill - if (FI >= LowSpillSlot) { - assert(FI >= 0 && "Spill slot index should not be negative!"); - assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size() - && "Invalid spill slot"); - SpillSlotToUsesMap[FI-LowSpillSlot].insert(MI); - } - } -} - -void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI, - MachineInstr *NewMI, ModRef MRInfo) { - // Move previous memory references folded to new instruction. - MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI); - for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI), - E = MI2VirtMap.end(); I != E && I->first == OldMI; ) { - MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second)); - MI2VirtMap.erase(I++); - } - - // add new memory reference - MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo))); -} - -void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo) { - MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(MI); - MI2VirtMap.insert(IP, std::make_pair(MI, std::make_pair(VirtReg, MRInfo))); -} - -void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isFI()) - continue; - int FI = MO.getIndex(); - if (MF->getFrameInfo()->isFixedObjectIndex(FI)) - continue; - // This stack reference was produced by instruction selection and - // is not a spill - if (FI < LowSpillSlot) - continue; - assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size() - && "Invalid spill slot"); - SpillSlotToUsesMap[FI-LowSpillSlot].erase(MI); - } - MI2VirtMap.erase(MI); - SpillPt2VirtMap.erase(MI); - RestorePt2VirtMap.erase(MI); - EmergencySpillMap.erase(MI); -} - -/// FindUnusedRegisters - Gather a list of allocatable registers that -/// have not been allocated to any virtual register. -bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) { - unsigned NumRegs = TRI->getNumRegs(); - UnusedRegs.reset(); - UnusedRegs.resize(NumRegs); - - BitVector Used(NumRegs); - for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) - Used.set(Virt2PhysMap[Reg]); - } - - BitVector Allocatable = TRI->getAllocatableSet(*MF); - bool AnyUnused = false; - for (unsigned Reg = 1; Reg < NumRegs; ++Reg) { - if (Allocatable[Reg] && !Used[Reg] && !LIs->hasInterval(Reg)) { - bool ReallyUnused = true; - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { - if (Used[*AS] || LIs->hasInterval(*AS)) { - ReallyUnused = false; - break; - } - } - if (ReallyUnused) { - AnyUnused = true; - UnusedRegs.set(Reg); - } - } - } - - return AnyUnused; -} - void VirtRegMap::rewrite(SlotIndexes *Indexes) { DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" << "********** Function: " @@ -264,23 +112,32 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { SmallVector SuperDeads; SmallVector SuperDefs; SmallVector SuperKills; +#ifndef NDEBUG + BitVector Reserved = TRI->getReservedRegs(*MF); +#endif for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { DEBUG(MBBI->print(dbgs(), Indexes)); - for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end(); - MII != MIE;) { + for (MachineBasicBlock::instr_iterator + MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) { MachineInstr *MI = MII; ++MII; for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { MachineOperand &MO = *MOI; + + // Make sure MRI knows about registers clobbered by regmasks. + if (MO.isRegMask()) + MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; unsigned VirtReg = MO.getReg(); unsigned PhysReg = getPhys(VirtReg); assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg"); + assert(!Reserved.test(PhysReg) && "Reserved register assignment"); // Preserve semantics of sub-register operands. if (MO.getSubReg()) { @@ -332,7 +189,6 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { ++NumIdCopies; if (MI->getNumOperands() == 2) { DEBUG(dbgs() << "Deleting identity copy.\n"); - RemoveMachineInstrFromMaps(MI); if (Indexes) Indexes->removeMachineInstrFromMaps(MI); // It's safe to erase MI because MII has already been incremented. diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.h b/contrib/llvm/lib/CodeGen/VirtRegMap.h index 03abff356934..8cac31137e3d 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegMap.h +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.h @@ -18,22 +18,14 @@ #define LLVM_CODEGEN_VIRTREGMAP_H #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/LiveInterval.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include namespace llvm { - class LiveIntervals; class MachineInstr; class MachineFunction; class MachineRegisterInfo; class TargetInstrInfo; - class TargetRegisterInfo; class raw_ostream; class SlotIndexes; @@ -45,18 +37,12 @@ namespace llvm { MAX_STACK_SLOT = (1L << 18)-1 }; - enum ModRef { isRef = 1, isMod = 2, isModRef = 3 }; - typedef std::multimap > MI2VirtMapTy; - private: MachineRegisterInfo *MRI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineFunction *MF; - DenseMap allocatableRCRegs; - /// Virt2PhysMap - This is a virtual to physical register /// mapping. Each virtual register is required to have an entry in /// it; even spilled virtual registers (the register mapped to a @@ -70,71 +56,10 @@ namespace llvm { /// at. IndexedMap Virt2StackSlotMap; - /// Virt2ReMatIdMap - This is virtual register to rematerialization id - /// mapping. Each spilled virtual register that should be remat'd has an - /// entry in it which corresponds to the remat id. - IndexedMap Virt2ReMatIdMap; - /// Virt2SplitMap - This is virtual register to splitted virtual register /// mapping. IndexedMap Virt2SplitMap; - /// Virt2SplitKillMap - This is splitted virtual register to its last use - /// (kill) index mapping. - IndexedMap Virt2SplitKillMap; - - /// ReMatMap - This is virtual register to re-materialized instruction - /// mapping. Each virtual register whose definition is going to be - /// re-materialized has an entry in it. - IndexedMap ReMatMap; - - /// MI2VirtMap - This is MachineInstr to virtual register - /// mapping. In the case of memory spill code being folded into - /// instructions, we need to know which virtual register was - /// read/written by this instruction. - MI2VirtMapTy MI2VirtMap; - - /// SpillPt2VirtMap - This records the virtual registers which should - /// be spilled right after the MachineInstr due to live interval - /// splitting. - std::map > > - SpillPt2VirtMap; - - /// RestorePt2VirtMap - This records the virtual registers which should - /// be restored right before the MachineInstr due to live interval - /// splitting. - std::map > RestorePt2VirtMap; - - /// EmergencySpillMap - This records the physical registers that should - /// be spilled / restored around the MachineInstr since the register - /// allocator has run out of registers. - std::map > EmergencySpillMap; - - /// EmergencySpillSlots - This records emergency spill slots used to - /// spill physical registers when the register allocator runs out of - /// registers. Ideally only one stack slot is used per function per - /// register class. - std::map EmergencySpillSlots; - - /// ReMatId - Instead of assigning a stack slot to a to be rematerialized - /// virtual register, an unique id is being assigned. This keeps track of - /// the highest id used so far. Note, this starts at (1<<18) to avoid - /// conflicts with stack slot numbers. - int ReMatId; - - /// LowSpillSlot, HighSpillSlot - Lowest and highest spill slot indexes. - int LowSpillSlot, HighSpillSlot; - - /// SpillSlotToUsesMap - Records uses for each register spill slot. - SmallVector, 8> SpillSlotToUsesMap; - - /// ImplicitDefed - One bit for each virtual register. If set it indicates - /// the register is implicitly defined. - BitVector ImplicitDefed; - - /// UnusedRegs - A list of physical registers that have not been used. - BitVector UnusedRegs; - /// createSpillSlot - Allocate a spill slot for RC from MFI. unsigned createSpillSlot(const TargetRegisterClass *RC); @@ -144,11 +69,7 @@ namespace llvm { public: static char ID; VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG), - Virt2StackSlotMap(NO_STACK_SLOT), - Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0), - Virt2SplitKillMap(SlotIndex()), ReMatMap(NULL), - ReMatId(MAX_STACK_SLOT+1), - LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { } + Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) { } virtual bool runOnMachineFunction(MachineFunction &MF); virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -235,8 +156,7 @@ namespace llvm { /// @brief returns true if the specified virtual register is not /// mapped to a stack slot or rematerialized. bool isAssignedReg(unsigned virtReg) const { - if (getStackSlot(virtReg) == NO_STACK_SLOT && - getReMatId(virtReg) == NO_STACK_SLOT) + if (getStackSlot(virtReg) == NO_STACK_SLOT) return true; // Split register can be assigned a physical register as well as a // stack slot or remat id. @@ -250,13 +170,6 @@ namespace llvm { return Virt2StackSlotMap[virtReg]; } - /// @brief returns the rematerialization id mapped to the specified virtual - /// register - int getReMatId(unsigned virtReg) const { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - return Virt2ReMatIdMap[virtReg]; - } - /// @brief create a mapping for the specifed virtual register to /// the next available stack slot int assignVirt2StackSlot(unsigned virtReg); @@ -264,250 +177,6 @@ namespace llvm { /// the specified stack slot void assignVirt2StackSlot(unsigned virtReg, int frameIndex); - /// @brief assign an unique re-materialization id to the specified - /// virtual register. - int assignVirtReMatId(unsigned virtReg); - /// @brief assign an unique re-materialization id to the specified - /// virtual register. - void assignVirtReMatId(unsigned virtReg, int id); - - /// @brief returns true if the specified virtual register is being - /// re-materialized. - bool isReMaterialized(unsigned virtReg) const { - return ReMatMap[virtReg] != NULL; - } - - /// @brief returns the original machine instruction being re-issued - /// to re-materialize the specified virtual register. - MachineInstr *getReMaterializedMI(unsigned virtReg) const { - return ReMatMap[virtReg]; - } - - /// @brief records the specified virtual register will be - /// re-materialized and the original instruction which will be re-issed - /// for this purpose. If parameter all is true, then all uses of the - /// registers are rematerialized and it's safe to delete the definition. - void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) { - ReMatMap[virtReg] = def; - } - - /// @brief record the last use (kill) of a split virtual register. - void addKillPoint(unsigned virtReg, SlotIndex index) { - Virt2SplitKillMap[virtReg] = index; - } - - SlotIndex getKillPoint(unsigned virtReg) const { - return Virt2SplitKillMap[virtReg]; - } - - /// @brief remove the last use (kill) of a split virtual register. - void removeKillPoint(unsigned virtReg) { - Virt2SplitKillMap[virtReg] = SlotIndex(); - } - - /// @brief returns true if the specified MachineInstr is a spill point. - bool isSpillPt(MachineInstr *Pt) const { - return SpillPt2VirtMap.find(Pt) != SpillPt2VirtMap.end(); - } - - /// @brief returns the virtual registers that should be spilled due to - /// splitting right after the specified MachineInstr. - std::vector > &getSpillPtSpills(MachineInstr *Pt) { - return SpillPt2VirtMap[Pt]; - } - - /// @brief records the specified MachineInstr as a spill point for virtReg. - void addSpillPoint(unsigned virtReg, bool isKill, MachineInstr *Pt) { - std::map > >::iterator - I = SpillPt2VirtMap.find(Pt); - if (I != SpillPt2VirtMap.end()) - I->second.push_back(std::make_pair(virtReg, isKill)); - else { - std::vector > Virts; - Virts.push_back(std::make_pair(virtReg, isKill)); - SpillPt2VirtMap.insert(std::make_pair(Pt, Virts)); - } - } - - /// @brief - transfer spill point information from one instruction to - /// another. - void transferSpillPts(MachineInstr *Old, MachineInstr *New) { - std::map > >::iterator - I = SpillPt2VirtMap.find(Old); - if (I == SpillPt2VirtMap.end()) - return; - while (!I->second.empty()) { - unsigned virtReg = I->second.back().first; - bool isKill = I->second.back().second; - I->second.pop_back(); - addSpillPoint(virtReg, isKill, New); - } - SpillPt2VirtMap.erase(I); - } - - /// @brief returns true if the specified MachineInstr is a restore point. - bool isRestorePt(MachineInstr *Pt) const { - return RestorePt2VirtMap.find(Pt) != RestorePt2VirtMap.end(); - } - - /// @brief returns the virtual registers that should be restoreed due to - /// splitting right after the specified MachineInstr. - std::vector &getRestorePtRestores(MachineInstr *Pt) { - return RestorePt2VirtMap[Pt]; - } - - /// @brief records the specified MachineInstr as a restore point for virtReg. - void addRestorePoint(unsigned virtReg, MachineInstr *Pt) { - std::map >::iterator I = - RestorePt2VirtMap.find(Pt); - if (I != RestorePt2VirtMap.end()) - I->second.push_back(virtReg); - else { - std::vector Virts; - Virts.push_back(virtReg); - RestorePt2VirtMap.insert(std::make_pair(Pt, Virts)); - } - } - - /// @brief - transfer restore point information from one instruction to - /// another. - void transferRestorePts(MachineInstr *Old, MachineInstr *New) { - std::map >::iterator I = - RestorePt2VirtMap.find(Old); - if (I == RestorePt2VirtMap.end()) - return; - while (!I->second.empty()) { - unsigned virtReg = I->second.back(); - I->second.pop_back(); - addRestorePoint(virtReg, New); - } - RestorePt2VirtMap.erase(I); - } - - /// @brief records that the specified physical register must be spilled - /// around the specified machine instr. - void addEmergencySpill(unsigned PhysReg, MachineInstr *MI) { - if (EmergencySpillMap.find(MI) != EmergencySpillMap.end()) - EmergencySpillMap[MI].push_back(PhysReg); - else { - std::vector PhysRegs; - PhysRegs.push_back(PhysReg); - EmergencySpillMap.insert(std::make_pair(MI, PhysRegs)); - } - } - - /// @brief returns true if one or more physical registers must be spilled - /// around the specified instruction. - bool hasEmergencySpills(MachineInstr *MI) const { - return EmergencySpillMap.find(MI) != EmergencySpillMap.end(); - } - - /// @brief returns the physical registers to be spilled and restored around - /// the instruction. - std::vector &getEmergencySpills(MachineInstr *MI) { - return EmergencySpillMap[MI]; - } - - /// @brief - transfer emergency spill information from one instruction to - /// another. - void transferEmergencySpills(MachineInstr *Old, MachineInstr *New) { - std::map >::iterator I = - EmergencySpillMap.find(Old); - if (I == EmergencySpillMap.end()) - return; - while (!I->second.empty()) { - unsigned virtReg = I->second.back(); - I->second.pop_back(); - addEmergencySpill(virtReg, New); - } - EmergencySpillMap.erase(I); - } - - /// @brief return or get a emergency spill slot for the register class. - int getEmergencySpillSlot(const TargetRegisterClass *RC); - - /// @brief Return lowest spill slot index. - int getLowSpillSlot() const { - return LowSpillSlot; - } - - /// @brief Return highest spill slot index. - int getHighSpillSlot() const { - return HighSpillSlot; - } - - /// @brief Records a spill slot use. - void addSpillSlotUse(int FrameIndex, MachineInstr *MI); - - /// @brief Returns true if spill slot has been used. - bool isSpillSlotUsed(int FrameIndex) const { - assert(FrameIndex >= 0 && "Spill slot index should not be negative!"); - return !SpillSlotToUsesMap[FrameIndex-LowSpillSlot].empty(); - } - - /// @brief Mark the specified register as being implicitly defined. - void setIsImplicitlyDefined(unsigned VirtReg) { - ImplicitDefed.set(TargetRegisterInfo::virtReg2Index(VirtReg)); - } - - /// @brief Returns true if the virtual register is implicitly defined. - bool isImplicitlyDefined(unsigned VirtReg) const { - return ImplicitDefed[TargetRegisterInfo::virtReg2Index(VirtReg)]; - } - - /// @brief Updates information about the specified virtual register's value - /// folded into newMI machine instruction. - void virtFolded(unsigned VirtReg, MachineInstr *OldMI, MachineInstr *NewMI, - ModRef MRInfo); - - /// @brief Updates information about the specified virtual register's value - /// folded into the specified machine instruction. - void virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo); - - /// @brief returns the virtual registers' values folded in memory - /// operands of this instruction - std::pair - getFoldedVirts(MachineInstr* MI) const { - return MI2VirtMap.equal_range(MI); - } - - /// RemoveMachineInstrFromMaps - MI is being erased, remove it from the - /// the folded instruction map and spill point map. - void RemoveMachineInstrFromMaps(MachineInstr *MI); - - /// FindUnusedRegisters - Gather a list of allocatable registers that - /// have not been allocated to any virtual register. - bool FindUnusedRegisters(LiveIntervals* LIs); - - /// HasUnusedRegisters - Return true if there are any allocatable registers - /// that have not been allocated to any virtual register. - bool HasUnusedRegisters() const { - return !UnusedRegs.none(); - } - - /// setRegisterUsed - Remember the physical register is now used. - void setRegisterUsed(unsigned Reg) { - UnusedRegs.reset(Reg); - } - - /// isRegisterUnused - Return true if the physical register has not been - /// used. - bool isRegisterUnused(unsigned Reg) const { - return UnusedRegs[Reg]; - } - - /// getFirstUnusedRegister - Return the first physical register that has not - /// been used. - unsigned getFirstUnusedRegister(const TargetRegisterClass *RC) { - int Reg = UnusedRegs.find_first(); - while (Reg != -1) { - if (allocatableRCRegs[RC][Reg]) - return (unsigned)Reg; - Reg = UnusedRegs.find_next(Reg); - } - return 0; - } - /// rewrite - Rewrite all instructions in MF to use only physical registers /// by mapping all virtual register operands to their assigned physical /// registers. diff --git a/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp b/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp deleted file mode 100644 index a5ec797b27db..000000000000 --- a/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp +++ /dev/null @@ -1,2633 +0,0 @@ -//===-- llvm/CodeGen/Rewriter.cpp - Rewriter -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "virtregrewriter" -#include "VirtRegRewriter.h" -#include "VirtRegMap.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -STATISTIC(NumDSE , "Number of dead stores elided"); -STATISTIC(NumDSS , "Number of dead spill slots removed"); -STATISTIC(NumCommutes, "Number of instructions commuted"); -STATISTIC(NumDRM , "Number of re-materializable defs elided"); -STATISTIC(NumStores , "Number of stores added"); -STATISTIC(NumPSpills , "Number of physical register spills"); -STATISTIC(NumOmitted , "Number of reloads omitted"); -STATISTIC(NumAvoided , "Number of reloads deemed unnecessary"); -STATISTIC(NumCopified, "Number of available reloads turned into copies"); -STATISTIC(NumReMats , "Number of re-materialization"); -STATISTIC(NumLoads , "Number of loads added"); -STATISTIC(NumReused , "Number of values reused"); -STATISTIC(NumDCE , "Number of copies elided"); -STATISTIC(NumSUnfold , "Number of stores unfolded"); -STATISTIC(NumModRefUnfold, "Number of modref unfolded"); - -namespace { - enum RewriterName { local, trivial }; -} - -static cl::opt -RewriterOpt("rewriter", - cl::desc("Rewriter to use (default=local)"), - cl::Prefix, - cl::values(clEnumVal(local, "local rewriter"), - clEnumVal(trivial, "trivial rewriter"), - clEnumValEnd), - cl::init(local)); - -static cl::opt -ScheduleSpills("schedule-spills", - cl::desc("Schedule spill code"), - cl::init(false)); - -VirtRegRewriter::~VirtRegRewriter() {} - -/// substitutePhysReg - Replace virtual register in MachineOperand with a -/// physical register. Do the right thing with the sub-register index. -/// Note that operands may be added, so the MO reference is no longer valid. -static void substitutePhysReg(MachineOperand &MO, unsigned Reg, - const TargetRegisterInfo &TRI) { - if (MO.getSubReg()) { - MO.substPhysReg(Reg, TRI); - - // Any kill flags apply to the full virtual register, so they also apply to - // the full physical register. - // We assume that partial defs have already been decorated with a super-reg - // operand by LiveIntervals. - MachineInstr &MI = *MO.getParent(); - if (MO.isUse() && !MO.isUndef() && - (MO.isKill() || MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0)))) - MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true); - } else { - MO.setReg(Reg); - } -} - -namespace { - -/// This class is intended for use with the new spilling framework only. It -/// rewrites vreg def/uses to use the assigned preg, but does not insert any -/// spill code. -struct TrivialRewriter : public VirtRegRewriter { - - bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, - LiveIntervals* LIs) { - DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n"); - DEBUG(dbgs() << "********** Function: " - << MF.getFunction()->getName() << '\n'); - DEBUG(dbgs() << "**** Machine Instrs" - << "(NOTE! Does not include spills and reloads!) ****\n"); - DEBUG(MF.dump()); - - MachineRegisterInfo *mri = &MF.getRegInfo(); - const TargetRegisterInfo *tri = MF.getTarget().getRegisterInfo(); - - bool changed = false; - - for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end(); - liItr != liEnd; ++liItr) { - - const LiveInterval *li = liItr->second; - unsigned reg = li->reg; - - if (TargetRegisterInfo::isPhysicalRegister(reg)) { - if (!li->empty()) - mri->setPhysRegUsed(reg); - } - else { - if (!VRM.hasPhys(reg)) - continue; - unsigned pReg = VRM.getPhys(reg); - mri->setPhysRegUsed(pReg); - // Copy the register use-list before traversing it. - SmallVector, 32> reglist; - for (MachineRegisterInfo::reg_iterator I = mri->reg_begin(reg), - E = mri->reg_end(); I != E; ++I) - reglist.push_back(std::make_pair(&*I, I.getOperandNo())); - for (unsigned N=0; N != reglist.size(); ++N) - substitutePhysReg(reglist[N].first->getOperand(reglist[N].second), - pReg, *tri); - changed |= !reglist.empty(); - } - } - - DEBUG(dbgs() << "**** Post Machine Instrs ****\n"); - DEBUG(MF.dump()); - - return changed; - } - -}; - -} - -// ************************************************************************ // - -namespace { - -/// AvailableSpills - As the local rewriter is scanning and rewriting an MBB -/// from top down, keep track of which spill slots or remat are available in -/// each register. -/// -/// Note that not all physregs are created equal here. In particular, some -/// physregs are reloads that we are allowed to clobber or ignore at any time. -/// Other physregs are values that the register allocated program is using -/// that we cannot CHANGE, but we can read if we like. We keep track of this -/// on a per-stack-slot / remat id basis as the low bit in the value of the -/// SpillSlotsAvailable entries. The predicate 'canClobberPhysReg()' checks -/// this bit and addAvailable sets it if. -class AvailableSpills { - const TargetRegisterInfo *TRI; - const TargetInstrInfo *TII; - - // SpillSlotsOrReMatsAvailable - This map keeps track of all of the spilled - // or remat'ed virtual register values that are still available, due to - // being loaded or stored to, but not invalidated yet. - std::map SpillSlotsOrReMatsAvailable; - - // PhysRegsAvailable - This is the inverse of SpillSlotsOrReMatsAvailable, - // indicating which stack slot values are currently held by a physreg. This - // is used to invalidate entries in SpillSlotsOrReMatsAvailable when a - // physreg is modified. - std::multimap PhysRegsAvailable; - - void disallowClobberPhysRegOnly(unsigned PhysReg); - - void ClobberPhysRegOnly(unsigned PhysReg); -public: - AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii) - : TRI(tri), TII(tii) { - } - - /// clear - Reset the state. - void clear() { - SpillSlotsOrReMatsAvailable.clear(); - PhysRegsAvailable.clear(); - } - - const TargetRegisterInfo *getRegInfo() const { return TRI; } - - /// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is - /// available in a physical register, return that PhysReg, otherwise - /// return 0. - unsigned getSpillSlotOrReMatPhysReg(int Slot) const { - std::map::const_iterator I = - SpillSlotsOrReMatsAvailable.find(Slot); - if (I != SpillSlotsOrReMatsAvailable.end()) { - return I->second >> 1; // Remove the CanClobber bit. - } - return 0; - } - - /// addAvailable - Mark that the specified stack slot / remat is available - /// in the specified physreg. If CanClobber is true, the physreg can be - /// modified at any time without changing the semantics of the program. - void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) { - // If this stack slot is thought to be available in some other physreg, - // remove its record. - ModifyStackSlotOrReMat(SlotOrReMat); - - PhysRegsAvailable.insert(std::make_pair(Reg, SlotOrReMat)); - SpillSlotsOrReMatsAvailable[SlotOrReMat]= (Reg << 1) | - (unsigned)CanClobber; - - if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) - DEBUG(dbgs() << "Remembering RM#" - << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat); - DEBUG(dbgs() << " in physreg " << TRI->getName(Reg) - << (CanClobber ? " canclobber" : "") << "\n"); - } - - /// canClobberPhysRegForSS - Return true if the spiller is allowed to change - /// the value of the specified stackslot register if it desires. The - /// specified stack slot must be available in a physreg for this query to - /// make sense. - bool canClobberPhysRegForSS(int SlotOrReMat) const { - assert(SpillSlotsOrReMatsAvailable.count(SlotOrReMat) && - "Value not available!"); - return SpillSlotsOrReMatsAvailable.find(SlotOrReMat)->second & 1; - } - - /// canClobberPhysReg - Return true if the spiller is allowed to clobber the - /// physical register where values for some stack slot(s) might be - /// available. - bool canClobberPhysReg(unsigned PhysReg) const { - std::multimap::const_iterator I = - PhysRegsAvailable.lower_bound(PhysReg); - while (I != PhysRegsAvailable.end() && I->first == PhysReg) { - int SlotOrReMat = I->second; - I++; - if (!canClobberPhysRegForSS(SlotOrReMat)) - return false; - } - return true; - } - - /// disallowClobberPhysReg - Unset the CanClobber bit of the specified - /// stackslot register. The register is still available but is no longer - /// allowed to be modifed. - void disallowClobberPhysReg(unsigned PhysReg); - - /// ClobberPhysReg - This is called when the specified physreg changes - /// value. We use this to invalidate any info about stuff that lives in - /// it and any of its aliases. - void ClobberPhysReg(unsigned PhysReg); - - /// ModifyStackSlotOrReMat - This method is called when the value in a stack - /// slot changes. This removes information about which register the - /// previous value for this slot lives in (as the previous value is dead - /// now). - void ModifyStackSlotOrReMat(int SlotOrReMat); - - /// ClobberSharingStackSlots - When a register mapped to a stack slot changes, - /// other stack slots sharing the same register are no longer valid. - void ClobberSharingStackSlots(int StackSlot); - - /// AddAvailableRegsToLiveIn - Availability information is being kept coming - /// into the specified MBB. Add available physical registers as potential - /// live-in's. If they are reused in the MBB, they will be added to the - /// live-in set to make register scavenger and post-allocation scheduler. - void AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, BitVector &RegKills, - std::vector &KillOps); -}; - -} - -// ************************************************************************ // - -// Given a location where a reload of a spilled register or a remat of -// a constant is to be inserted, attempt to find a safe location to -// insert the load at an earlier point in the basic-block, to hide -// latency of the load and to avoid address-generation interlock -// issues. -static MachineBasicBlock::iterator -ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc, - MachineBasicBlock::iterator const Begin, - unsigned PhysReg, - const TargetRegisterInfo *TRI, - bool DoReMat, - int SSorRMId, - const TargetInstrInfo *TII, - const MachineFunction &MF) -{ - if (!ScheduleSpills) - return InsertLoc; - - // Spill backscheduling is of primary interest to addresses, so - // don't do anything if the register isn't in the register class - // used for pointers. - - const TargetLowering *TL = MF.getTarget().getTargetLowering(); - - if (!TL->isTypeLegal(TL->getPointerTy())) - // Believe it or not, this is true on 16-bit targets like PIC16. - return InsertLoc; - - const TargetRegisterClass *ptrRegClass = - TL->getRegClassFor(TL->getPointerTy()); - if (!ptrRegClass->contains(PhysReg)) - return InsertLoc; - - // Scan upwards through the preceding instructions. If an instruction doesn't - // reference the stack slot or the register we're loading, we can - // backschedule the reload up past it. - MachineBasicBlock::iterator NewInsertLoc = InsertLoc; - while (NewInsertLoc != Begin) { - MachineBasicBlock::iterator Prev = prior(NewInsertLoc); - for (unsigned i = 0; i < Prev->getNumOperands(); ++i) { - MachineOperand &Op = Prev->getOperand(i); - if (!DoReMat && Op.isFI() && Op.getIndex() == SSorRMId) - goto stop; - } - if (Prev->findRegisterUseOperandIdx(PhysReg) != -1 || - Prev->findRegisterDefOperand(PhysReg)) - goto stop; - for (const unsigned *Alias = TRI->getAliasSet(PhysReg); *Alias; ++Alias) - if (Prev->findRegisterUseOperandIdx(*Alias) != -1 || - Prev->findRegisterDefOperand(*Alias)) - goto stop; - NewInsertLoc = Prev; - } -stop:; - - // If we made it to the beginning of the block, turn around and move back - // down just past any existing reloads. They're likely to be reloads/remats - // for instructions earlier than what our current reload/remat is for, so - // they should be scheduled earlier. - if (NewInsertLoc == Begin) { - int FrameIdx; - while (InsertLoc != NewInsertLoc && - (TII->isLoadFromStackSlot(NewInsertLoc, FrameIdx) || - TII->isTriviallyReMaterializable(NewInsertLoc))) - ++NewInsertLoc; - } - - return NewInsertLoc; -} - -namespace { - -// ReusedOp - For each reused operand, we keep track of a bit of information, -// in case we need to rollback upon processing a new operand. See comments -// below. -struct ReusedOp { - // The MachineInstr operand that reused an available value. - unsigned Operand; - - // StackSlotOrReMat - The spill slot or remat id of the value being reused. - unsigned StackSlotOrReMat; - - // PhysRegReused - The physical register the value was available in. - unsigned PhysRegReused; - - // AssignedPhysReg - The physreg that was assigned for use by the reload. - unsigned AssignedPhysReg; - - // VirtReg - The virtual register itself. - unsigned VirtReg; - - ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr, - unsigned vreg) - : Operand(o), StackSlotOrReMat(ss), PhysRegReused(prr), - AssignedPhysReg(apr), VirtReg(vreg) {} -}; - -/// ReuseInfo - This maintains a collection of ReuseOp's for each operand that -/// is reused instead of reloaded. -class ReuseInfo { - MachineInstr &MI; - std::vector Reuses; - BitVector PhysRegsClobbered; -public: - ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) { - PhysRegsClobbered.resize(tri->getNumRegs()); - } - - bool hasReuses() const { - return !Reuses.empty(); - } - - /// addReuse - If we choose to reuse a virtual register that is already - /// available instead of reloading it, remember that we did so. - void addReuse(unsigned OpNo, unsigned StackSlotOrReMat, - unsigned PhysRegReused, unsigned AssignedPhysReg, - unsigned VirtReg) { - // If the reload is to the assigned register anyway, no undo will be - // required. - if (PhysRegReused == AssignedPhysReg) return; - - // Otherwise, remember this. - Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused, - AssignedPhysReg, VirtReg)); - } - - void markClobbered(unsigned PhysReg) { - PhysRegsClobbered.set(PhysReg); - } - - bool isClobbered(unsigned PhysReg) const { - return PhysRegsClobbered.test(PhysReg); - } - - /// GetRegForReload - We are about to emit a reload into PhysReg. If there - /// is some other operand that is using the specified register, either pick - /// a new register to use, or evict the previous reload and use this reg. - unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg, - MachineFunction &MF, MachineInstr *MI, - AvailableSpills &Spills, - std::vector &MaybeDeadStores, - SmallSet &Rejected, - BitVector &RegKills, - std::vector &KillOps, - VirtRegMap &VRM); - - /// GetRegForReload - Helper for the above GetRegForReload(). Add a - /// 'Rejected' set to remember which registers have been considered and - /// rejected for the reload. This avoids infinite looping in case like - /// this: - /// t1 := op t2, t3 - /// t2 <- assigned r0 for use by the reload but ended up reuse r1 - /// t3 <- assigned r1 for use by the reload but ended up reuse r0 - /// t1 <- desires r1 - /// sees r1 is taken by t2, tries t2's reload register r0 - /// sees r0 is taken by t3, tries t3's reload register r1 - /// sees r1 is taken by t2, tries t2's reload register r0 ... - unsigned GetRegForReload(unsigned VirtReg, unsigned PhysReg, MachineInstr *MI, - AvailableSpills &Spills, - std::vector &MaybeDeadStores, - BitVector &RegKills, - std::vector &KillOps, - VirtRegMap &VRM) { - SmallSet Rejected; - MachineFunction &MF = *MI->getParent()->getParent(); - const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg); - return GetRegForReload(RC, PhysReg, MF, MI, Spills, MaybeDeadStores, - Rejected, RegKills, KillOps, VRM); - } -}; - -} - -// ****************** // -// Utility Functions // -// ****************** // - -/// findSinglePredSuccessor - Return via reference a vector of machine basic -/// blocks each of which is a successor of the specified BB and has no other -/// predecessor. -static void findSinglePredSuccessor(MachineBasicBlock *MBB, - SmallVectorImpl &Succs){ - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock *SuccMBB = *SI; - if (SuccMBB->pred_size() == 1) - Succs.push_back(SuccMBB); - } -} - -/// ResurrectConfirmedKill - Helper for ResurrectKill. This register is killed -/// but not re-defined and it's being reused. Remove the kill flag for the -/// register and unset the kill's marker and last kill operand. -static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI, - BitVector &RegKills, - std::vector &KillOps) { - DEBUG(dbgs() << "Resurrect " << TRI->getName(Reg) << "\n"); - - MachineOperand *KillOp = KillOps[Reg]; - KillOp->setIsKill(false); - // KillOps[Reg] might be a def of a super-register. - unsigned KReg = KillOp->getReg(); - if (!RegKills[KReg]) - return; - - assert(KillOps[KReg]->getParent() == KillOp->getParent() && - "invalid superreg kill flags"); - KillOps[KReg] = NULL; - RegKills.reset(KReg); - - // If it's a def of a super-register. Its other sub-regsters are no - // longer killed as well. - for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) { - DEBUG(dbgs() << " Resurrect subreg " << TRI->getName(*SR) << "\n"); - - assert(KillOps[*SR]->getParent() == KillOp->getParent() && - "invalid subreg kill flags"); - KillOps[*SR] = NULL; - RegKills.reset(*SR); - } -} - -/// ResurrectKill - Invalidate kill info associated with a previous MI. An -/// optimization may have decided that it's safe to reuse a previously killed -/// register. If we fail to erase the invalid kill flags, then the register -/// scavenger may later clobber the register used by this MI. Note that this -/// must be done even if this MI is being deleted! Consider: -/// -/// USE $r1 (vreg1) -/// ... -/// $r1(vreg3) = COPY $r1 (vreg2) -/// -/// RegAlloc has smartly assigned all three vregs to the same physreg. Initially -/// vreg1's only use is a kill. The rewriter doesn't know it should be live -/// until it rewrites vreg2. At that points it sees that the copy is dead and -/// deletes it. However, deleting the copy implicitly forwards liveness of $r1 -/// (it's copy coalescing). We must resurrect $r1 by removing the kill flag at -/// vreg1 before deleting the copy. -static void ResurrectKill(MachineInstr &MI, unsigned Reg, - const TargetRegisterInfo* TRI, BitVector &RegKills, - std::vector &KillOps) { - if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) { - ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps); - return; - } - // No previous kill for this reg. Check for subreg kills as well. - // d4 = - // store d4, fi#0 - // ... - // = s8 - // ... - // = d4 - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { - unsigned SReg = *SR; - if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) - ResurrectConfirmedKill(SReg, TRI, RegKills, KillOps); - } -} - -/// InvalidateKills - MI is going to be deleted. If any of its operands are -/// marked kill, then invalidate the information. -static void InvalidateKills(MachineInstr &MI, - const TargetRegisterInfo* TRI, - BitVector &RegKills, - std::vector &KillOps, - SmallVector *KillRegs = NULL) { - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef()) - continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - if (KillRegs) - KillRegs->push_back(Reg); - assert(Reg < KillOps.size()); - if (KillOps[Reg] == &MO) { - // This operand was the kill, now no longer. - KillOps[Reg] = NULL; - RegKills.reset(Reg); - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { - if (RegKills[*SR]) { - assert(KillOps[*SR] == &MO && "bad subreg kill flags"); - KillOps[*SR] = NULL; - RegKills.reset(*SR); - } - } - } - else { - // This operand may have reused a previously killed reg. Keep it live in - // case it continues to be used after erasing this instruction. - ResurrectKill(MI, Reg, TRI, RegKills, KillOps); - } - } -} - -/// InvalidateRegDef - If the def operand of the specified def MI is now dead -/// (since its spill instruction is removed), mark it isDead. Also checks if -/// the def MI has other definition operands that are not dead. Returns it by -/// reference. -static bool InvalidateRegDef(MachineBasicBlock::iterator I, - MachineInstr &NewDef, unsigned Reg, - bool &HasLiveDef, - const TargetRegisterInfo *TRI) { - // Due to remat, it's possible this reg isn't being reused. That is, - // the def of this reg (by prev MI) is now dead. - MachineInstr *DefMI = I; - MachineOperand *DefOp = NULL; - for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = DefMI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef()) - continue; - if (MO.getReg() == Reg) - DefOp = &MO; - else if (!MO.isDead()) - HasLiveDef = true; - } - if (!DefOp) - return false; - - bool FoundUse = false, Done = false; - MachineBasicBlock::iterator E = &NewDef; - ++I; ++E; - for (; !Done && I != E; ++I) { - MachineInstr *NMI = I; - for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) { - MachineOperand &MO = NMI->getOperand(j); - if (!MO.isReg() || MO.getReg() == 0 || - (MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg()))) - continue; - if (MO.isUse()) - FoundUse = true; - Done = true; // Stop after scanning all the operands of this MI. - } - } - if (!FoundUse) { - // Def is dead! - DefOp->setIsDead(); - return true; - } - return false; -} - -/// UpdateKills - Track and update kill info. If a MI reads a register that is -/// marked kill, then it must be due to register reuse. Transfer the kill info -/// over. -static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, - BitVector &RegKills, - std::vector &KillOps) { - // These do not affect kill info at all. - if (MI.isDebugValue()) - return; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) - continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) - continue; - - // This operand may have reused a previously killed reg. Keep it live. - ResurrectKill(MI, Reg, TRI, RegKills, KillOps); - - if (MO.isKill()) { - RegKills.set(Reg); - KillOps[Reg] = &MO; - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { - RegKills.set(*SR); - KillOps[*SR] = &MO; - } - } - } - - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.getReg() || !MO.isDef()) - continue; - unsigned Reg = MO.getReg(); - RegKills.reset(Reg); - KillOps[Reg] = NULL; - // It also defines (or partially define) aliases. - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { - RegKills.reset(*SR); - KillOps[*SR] = NULL; - } - for (const unsigned *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) { - RegKills.reset(*SR); - KillOps[*SR] = NULL; - } - } -} - -/// ReMaterialize - Re-materialize definition for Reg targeting DestReg. -/// -static void ReMaterialize(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MII, - unsigned DestReg, unsigned Reg, - const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI, - VirtRegMap &VRM) { - MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg); -#ifndef NDEBUG - const MCInstrDesc &MCID = ReMatDefMI->getDesc(); - assert(MCID.getNumDefs() == 1 && - "Don't know how to remat instructions that define > 1 values!"); -#endif - TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI); - MachineInstr *NewMI = prior(MII); - for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = NewMI->getOperand(i); - if (!MO.isReg() || MO.getReg() == 0) - continue; - unsigned VirtReg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) - continue; - assert(MO.isUse()); - unsigned Phys = VRM.getPhys(VirtReg); - assert(Phys && "Virtual register is not assigned a register?"); - substitutePhysReg(MO, Phys, *TRI); - } - ++NumReMats; -} - -/// findSuperReg - Find the SubReg's super-register of given register class -/// where its SubIdx sub-register is SubReg. -static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg, - unsigned SubIdx, const TargetRegisterInfo *TRI) { - for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); - I != E; ++I) { - unsigned Reg = *I; - if (TRI->getSubReg(Reg, SubIdx) == SubReg) - return Reg; - } - return 0; -} - -// ******************************** // -// Available Spills Implementation // -// ******************************** // - -/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified -/// stackslot register. The register is still available but is no longer -/// allowed to be modifed. -void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) { - std::multimap::iterator I = - PhysRegsAvailable.lower_bound(PhysReg); - while (I != PhysRegsAvailable.end() && I->first == PhysReg) { - int SlotOrReMat = I->second; - I++; - assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && - "Bidirectional map mismatch!"); - SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1; - DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg) - << " copied, it is available for use but can no longer be modified\n"); - } -} - -/// disallowClobberPhysReg - Unset the CanClobber bit of the specified -/// stackslot register and its aliases. The register and its aliases may -/// still available but is no longer allowed to be modifed. -void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) { - for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS) - disallowClobberPhysRegOnly(*AS); - disallowClobberPhysRegOnly(PhysReg); -} - -/// ClobberPhysRegOnly - This is called when the specified physreg changes -/// value. We use this to invalidate any info about stuff we thing lives in it. -void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) { - std::multimap::iterator I = - PhysRegsAvailable.lower_bound(PhysReg); - while (I != PhysRegsAvailable.end() && I->first == PhysReg) { - int SlotOrReMat = I->second; - PhysRegsAvailable.erase(I++); - assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && - "Bidirectional map mismatch!"); - SpillSlotsOrReMatsAvailable.erase(SlotOrReMat); - DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg) - << " clobbered, invalidating "); - if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) - DEBUG(dbgs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n"); - else - DEBUG(dbgs() << "SS#" << SlotOrReMat << "\n"); - } -} - -/// ClobberPhysReg - This is called when the specified physreg changes -/// value. We use this to invalidate any info about stuff we thing lives in -/// it and any of its aliases. -void AvailableSpills::ClobberPhysReg(unsigned PhysReg) { - for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS) - ClobberPhysRegOnly(*AS); - ClobberPhysRegOnly(PhysReg); -} - -/// AddAvailableRegsToLiveIn - Availability information is being kept coming -/// into the specified MBB. Add available physical registers as potential -/// live-in's. If they are reused in the MBB, they will be added to the -/// live-in set to make register scavenger and post-allocation scheduler. -void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, - BitVector &RegKills, - std::vector &KillOps) { - std::set NotAvailable; - for (std::multimap::iterator - I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end(); - I != E; ++I) { - unsigned Reg = I->first; - const TargetRegisterClass* RC = TRI->getMinimalPhysRegClass(Reg); - // FIXME: A temporary workaround. We can't reuse available value if it's - // not safe to move the def of the virtual register's class. e.g. - // X86::RFP* register classes. Do not add it as a live-in. - if (!TII->isSafeToMoveRegClassDefs(RC)) - // This is no longer available. - NotAvailable.insert(Reg); - else { - MBB.addLiveIn(Reg); - if (RegKills[Reg]) - ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps); - } - - // Skip over the same register. - std::multimap::iterator NI = llvm::next(I); - while (NI != E && NI->first == Reg) { - ++I; - ++NI; - } - } - - for (std::set::iterator I = NotAvailable.begin(), - E = NotAvailable.end(); I != E; ++I) { - ClobberPhysReg(*I); - for (const unsigned *SubRegs = TRI->getSubRegisters(*I); - *SubRegs; ++SubRegs) - ClobberPhysReg(*SubRegs); - } -} - -/// ModifyStackSlotOrReMat - This method is called when the value in a stack -/// slot changes. This removes information about which register the previous -/// value for this slot lives in (as the previous value is dead now). -void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) { - std::map::iterator It = - SpillSlotsOrReMatsAvailable.find(SlotOrReMat); - if (It == SpillSlotsOrReMatsAvailable.end()) return; - unsigned Reg = It->second >> 1; - SpillSlotsOrReMatsAvailable.erase(It); - - // This register may hold the value of multiple stack slots, only remove this - // stack slot from the set of values the register contains. - std::multimap::iterator I = PhysRegsAvailable.lower_bound(Reg); - for (; ; ++I) { - assert(I != PhysRegsAvailable.end() && I->first == Reg && - "Map inverse broken!"); - if (I->second == SlotOrReMat) break; - } - PhysRegsAvailable.erase(I); -} - -void AvailableSpills::ClobberSharingStackSlots(int StackSlot) { - std::map::iterator It = - SpillSlotsOrReMatsAvailable.find(StackSlot); - if (It == SpillSlotsOrReMatsAvailable.end()) return; - unsigned Reg = It->second >> 1; - - // Erase entries in PhysRegsAvailable for other stack slots. - std::multimap::iterator I = PhysRegsAvailable.lower_bound(Reg); - while (I != PhysRegsAvailable.end() && I->first == Reg) { - std::multimap::iterator NextI = llvm::next(I); - if (I->second != StackSlot) { - DEBUG(dbgs() << "Clobbered sharing SS#" << I->second << " in " - << PrintReg(Reg, TRI) << '\n'); - SpillSlotsOrReMatsAvailable.erase(I->second); - PhysRegsAvailable.erase(I); - } - I = NextI; - } -} - -// ************************** // -// Reuse Info Implementation // -// ************************** // - -/// GetRegForReload - We are about to emit a reload into PhysReg. If there -/// is some other operand that is using the specified register, either pick -/// a new register to use, or evict the previous reload and use this reg. -unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC, - unsigned PhysReg, - MachineFunction &MF, - MachineInstr *MI, AvailableSpills &Spills, - std::vector &MaybeDeadStores, - SmallSet &Rejected, - BitVector &RegKills, - std::vector &KillOps, - VirtRegMap &VRM) { - const TargetInstrInfo* TII = MF.getTarget().getInstrInfo(); - const TargetRegisterInfo *TRI = Spills.getRegInfo(); - - if (Reuses.empty()) return PhysReg; // This is most often empty. - - for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) { - ReusedOp &Op = Reuses[ro]; - // If we find some other reuse that was supposed to use this register - // exactly for its reload, we can change this reload to use ITS reload - // register. That is, unless its reload register has already been - // considered and subsequently rejected because it has also been reused - // by another operand. - if (Op.PhysRegReused == PhysReg && - Rejected.count(Op.AssignedPhysReg) == 0 && - RC->contains(Op.AssignedPhysReg)) { - // Yup, use the reload register that we didn't use before. - unsigned NewReg = Op.AssignedPhysReg; - Rejected.insert(PhysReg); - return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores, - Rejected, RegKills, KillOps, VRM); - } else { - // Otherwise, we might also have a problem if a previously reused - // value aliases the new register. If so, codegen the previous reload - // and use this one. - unsigned PRRU = Op.PhysRegReused; - if (TRI->regsOverlap(PRRU, PhysReg)) { - // Okay, we found out that an alias of a reused register - // was used. This isn't good because it means we have - // to undo a previous reuse. - MachineBasicBlock *MBB = MI->getParent(); - const TargetRegisterClass *AliasRC = - MBB->getParent()->getRegInfo().getRegClass(Op.VirtReg); - - // Copy Op out of the vector and remove it, we're going to insert an - // explicit load for it. - ReusedOp NewOp = Op; - Reuses.erase(Reuses.begin()+ro); - - // MI may be using only a sub-register of PhysRegUsed. - unsigned RealPhysRegUsed = MI->getOperand(NewOp.Operand).getReg(); - unsigned SubIdx = 0; - assert(TargetRegisterInfo::isPhysicalRegister(RealPhysRegUsed) && - "A reuse cannot be a virtual register"); - if (PRRU != RealPhysRegUsed) { - // What was the sub-register index? - SubIdx = TRI->getSubRegIndex(PRRU, RealPhysRegUsed); - assert(SubIdx && - "Operand physreg is not a sub-register of PhysRegUsed"); - } - - // Ok, we're going to try to reload the assigned physreg into the - // slot that we were supposed to in the first place. However, that - // register could hold a reuse. Check to see if it conflicts or - // would prefer us to use a different register. - unsigned NewPhysReg = GetRegForReload(RC, NewOp.AssignedPhysReg, - MF, MI, Spills, MaybeDeadStores, - Rejected, RegKills, KillOps, VRM); - - bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT; - int SSorRMId = DoReMat - ? VRM.getReMatId(NewOp.VirtReg) : (int) NewOp.StackSlotOrReMat; - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, - DoReMat, SSorRMId, TII, MF); - - if (DoReMat) { - ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII, - TRI, VRM); - } else { - TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg, - NewOp.StackSlotOrReMat, AliasRC, TRI); - MachineInstr *LoadMI = prior(InsertLoc); - VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI); - // Any stores to this stack slot are not dead anymore. - MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL; - ++NumLoads; - } - Spills.ClobberPhysReg(NewPhysReg); - Spills.ClobberPhysReg(NewOp.PhysRegReused); - - unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) :NewPhysReg; - MI->getOperand(NewOp.Operand).setReg(RReg); - MI->getOperand(NewOp.Operand).setSubReg(0); - - Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg); - UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(dbgs() << '\t' << *prior(InsertLoc)); - - DEBUG(dbgs() << "Reuse undone!\n"); - --NumReused; - - // Finally, PhysReg is now available, go ahead and use it. - return PhysReg; - } - } - } - return PhysReg; -} - -// ************************************************************************ // - -/// FoldsStackSlotModRef - Return true if the specified MI folds the specified -/// stack slot mod/ref. It also checks if it's possible to unfold the -/// instruction by having it define a specified physical register instead. -static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg, - const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI, - VirtRegMap &VRM) { - if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI)) - return false; - - bool Found = false; - VirtRegMap::MI2VirtMapTy::const_iterator I, End; - for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) { - unsigned VirtReg = I->second.first; - VirtRegMap::ModRef MR = I->second.second; - if (MR & VirtRegMap::isModRef) - if (VRM.getStackSlot(VirtReg) == SS) { - Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0; - break; - } - } - if (!Found) - return false; - - // Does the instruction uses a register that overlaps the scratch register? - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.getReg() == 0) - continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - if (!VRM.hasPhys(Reg)) - continue; - Reg = VRM.getPhys(Reg); - } - if (TRI->regsOverlap(PhysReg, Reg)) - return false; - } - return true; -} - -/// FindFreeRegister - Find a free register of a given register class by looking -/// at (at most) the last two machine instructions. -static unsigned FindFreeRegister(MachineBasicBlock::iterator MII, - MachineBasicBlock &MBB, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, - BitVector &AllocatableRegs) { - BitVector Defs(TRI->getNumRegs()); - BitVector Uses(TRI->getNumRegs()); - SmallVector LocalUses; - SmallVector Kills; - - // Take a look at 2 instructions at most. - unsigned Count = 0; - while (Count < 2) { - if (MII == MBB.begin()) - break; - MachineInstr *PrevMI = prior(MII); - MII = PrevMI; - - if (PrevMI->isDebugValue()) - continue; // Skip over dbg_value instructions. - ++Count; - - for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = PrevMI->getOperand(i); - if (!MO.isReg() || MO.getReg() == 0) - continue; - unsigned Reg = MO.getReg(); - if (MO.isDef()) { - Defs.set(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - Defs.set(*AS); - } else { - LocalUses.push_back(Reg); - if (MO.isKill() && AllocatableRegs[Reg]) - Kills.push_back(Reg); - } - } - - for (unsigned i = 0, e = Kills.size(); i != e; ++i) { - unsigned Kill = Kills[i]; - if (!Defs[Kill] && !Uses[Kill] && - RC->contains(Kill)) - return Kill; - } - for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { - unsigned Reg = LocalUses[i]; - Uses.set(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - Uses.set(*AS); - } - } - - return 0; -} - -static -void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg, - const TargetRegisterInfo &TRI) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == VirtReg) - substitutePhysReg(MO, PhysReg, TRI); - } -} - -namespace { - -struct RefSorter { - bool operator()(const std::pair &A, - const std::pair &B) { - return A.second < B.second; - } -}; - -// ***************************** // -// Local Spiller Implementation // -// ***************************** // - -class LocalRewriter : public VirtRegRewriter { - MachineRegisterInfo *MRI; - const TargetRegisterInfo *TRI; - const TargetInstrInfo *TII; - VirtRegMap *VRM; - LiveIntervals *LIs; - BitVector AllocatableRegs; - DenseMap DistanceMap; - DenseMap > Slot2DbgValues; - - MachineBasicBlock *MBB; // Basic block currently being processed. - -public: - - bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, - LiveIntervals* LIs); - -private: - void EraseInstr(MachineInstr *MI) { - VRM->RemoveMachineInstrFromMaps(MI); - LIs->RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - } - - bool OptimizeByUnfold2(unsigned VirtReg, int SS, - MachineBasicBlock::iterator &MII, - std::vector &MaybeDeadStores, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector &KillOps); - - bool OptimizeByUnfold(MachineBasicBlock::iterator &MII, - std::vector &MaybeDeadStores, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector &KillOps); - - bool CommuteToFoldReload(MachineBasicBlock::iterator &MII, - unsigned VirtReg, unsigned SrcReg, int SS, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector &KillOps, - const TargetRegisterInfo *TRI); - - void SpillRegToStackSlot(MachineBasicBlock::iterator &MII, - int Idx, unsigned PhysReg, int StackSlot, - const TargetRegisterClass *RC, - bool isAvailable, MachineInstr *&LastStore, - AvailableSpills &Spills, - SmallSet &ReMatDefs, - BitVector &RegKills, - std::vector &KillOps); - - void TransferDeadness(unsigned Reg, BitVector &RegKills, - std::vector &KillOps); - - bool InsertEmergencySpills(MachineInstr *MI); - - bool InsertRestores(MachineInstr *MI, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector &KillOps); - - bool InsertSpills(MachineInstr *MI); - - void ProcessUses(MachineInstr &MI, AvailableSpills &Spills, - std::vector &MaybeDeadStores, - BitVector &RegKills, - ReuseInfo &ReusedOperands, - std::vector &KillOps); - - void RewriteMBB(LiveIntervals *LIs, - AvailableSpills &Spills, BitVector &RegKills, - std::vector &KillOps); -}; -} - -bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm, - LiveIntervals* lis) { - MRI = &MF.getRegInfo(); - TRI = MF.getTarget().getRegisterInfo(); - TII = MF.getTarget().getInstrInfo(); - VRM = &vrm; - LIs = lis; - AllocatableRegs = TRI->getAllocatableSet(MF); - DEBUG(dbgs() << "\n**** Local spiller rewriting function '" - << MF.getFunction()->getName() << "':\n"); - DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and" - " reloads!) ****\n"); - DEBUG(MF.print(dbgs(), LIs->getSlotIndexes())); - - // Spills - Keep track of which spilled values are available in physregs - // so that we can choose to reuse the physregs instead of emitting - // reloads. This is usually refreshed per basic block. - AvailableSpills Spills(TRI, TII); - - // Keep track of kill information. - BitVector RegKills(TRI->getNumRegs()); - std::vector KillOps; - KillOps.resize(TRI->getNumRegs(), NULL); - - // SingleEntrySuccs - Successor blocks which have a single predecessor. - SmallVector SinglePredSuccs; - SmallPtrSet EarlyVisited; - - // Traverse the basic blocks depth first. - MachineBasicBlock *Entry = MF.begin(); - SmallPtrSet Visited; - for (df_ext_iterator > - DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); - DFI != E; ++DFI) { - MBB = *DFI; - if (!EarlyVisited.count(MBB)) - RewriteMBB(LIs, Spills, RegKills, KillOps); - - // If this MBB is the only predecessor of a successor. Keep the - // availability information and visit it next. - do { - // Keep visiting single predecessor successor as long as possible. - SinglePredSuccs.clear(); - findSinglePredSuccessor(MBB, SinglePredSuccs); - if (SinglePredSuccs.empty()) - MBB = 0; - else { - // FIXME: More than one successors, each of which has MBB has - // the only predecessor. - MBB = SinglePredSuccs[0]; - if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) { - Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps); - RewriteMBB(LIs, Spills, RegKills, KillOps); - } - } - } while (MBB); - - // Clear the availability info. - Spills.clear(); - } - - DEBUG(dbgs() << "**** Post Machine Instrs ****\n"); - DEBUG(MF.print(dbgs(), LIs->getSlotIndexes())); - - // Mark unused spill slots. - MachineFrameInfo *MFI = MF.getFrameInfo(); - int SS = VRM->getLowSpillSlot(); - if (SS != VirtRegMap::NO_STACK_SLOT) { - for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS) { - SmallVector &DbgValues = Slot2DbgValues[SS]; - if (!VRM->isSpillSlotUsed(SS)) { - MFI->RemoveStackObject(SS); - for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) { - MachineInstr *DVMI = DbgValues[j]; - DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n'); - EraseInstr(DVMI); - } - ++NumDSS; - } - DbgValues.clear(); - } - } - Slot2DbgValues.clear(); - - return true; -} - -/// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if -/// a scratch register is available. -/// xorq %r12, %r13 -/// addq %rax, -184(%rbp) -/// addq %r13, -184(%rbp) -/// ==> -/// xorq %r12, %r13 -/// movq -184(%rbp), %r12 -/// addq %rax, %r12 -/// addq %r13, %r12 -/// movq %r12, -184(%rbp) -bool LocalRewriter:: -OptimizeByUnfold2(unsigned VirtReg, int SS, - MachineBasicBlock::iterator &MII, - std::vector &MaybeDeadStores, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector &KillOps) { - - MachineBasicBlock::iterator NextMII = llvm::next(MII); - // Skip over dbg_value instructions. - while (NextMII != MBB->end() && NextMII->isDebugValue()) - NextMII = llvm::next(NextMII); - if (NextMII == MBB->end()) - return false; - - if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0) - return false; - - // Now let's see if the last couple of instructions happens to have freed up - // a register. - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - unsigned PhysReg = FindFreeRegister(MII, *MBB, RC, TRI, AllocatableRegs); - if (!PhysReg) - return false; - - MachineFunction &MF = *MBB->getParent(); - TRI = MF.getTarget().getRegisterInfo(); - MachineInstr &MI = *MII; - if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, *VRM)) - return false; - - // If the next instruction also folds the same SS modref and can be unfoled, - // then it's worthwhile to issue a load from SS into the free register and - // then unfold these instructions. - if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM)) - return false; - - // Back-schedule reloads and remats. - ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, false, SS, TII, MF); - - // Load from SS to the spare physical register. - TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC, TRI); - // This invalidates Phys. - Spills.ClobberPhysReg(PhysReg); - // Remember it's available. - Spills.addAvailable(SS, PhysReg); - MaybeDeadStores[SS] = NULL; - - // Unfold current MI. - SmallVector NewMIs; - if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs)) - llvm_unreachable("Unable unfold the load / store folding instruction!"); - assert(NewMIs.size() == 1); - AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI); - VRM->transferRestorePts(&MI, NewMIs[0]); - MII = MBB->insert(MII, NewMIs[0]); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - ++NumModRefUnfold; - - // Unfold next instructions that fold the same SS. - do { - MachineInstr &NextMI = *NextMII; - NextMII = llvm::next(NextMII); - NewMIs.clear(); - if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs)) - llvm_unreachable("Unable unfold the load / store folding instruction!"); - assert(NewMIs.size() == 1); - AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI); - VRM->transferRestorePts(&NextMI, NewMIs[0]); - MBB->insert(NextMII, NewMIs[0]); - InvalidateKills(NextMI, TRI, RegKills, KillOps); - EraseInstr(&NextMI); - ++NumModRefUnfold; - // Skip over dbg_value instructions. - while (NextMII != MBB->end() && NextMII->isDebugValue()) - NextMII = llvm::next(NextMII); - if (NextMII == MBB->end()) - break; - } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM)); - - // Store the value back into SS. - TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC, TRI); - MachineInstr *StoreMI = prior(NextMII); - VRM->addSpillSlotUse(SS, StoreMI); - VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); - - return true; -} - -/// OptimizeByUnfold - Turn a store folding instruction into a load folding -/// instruction. e.g. -/// xorl %edi, %eax -/// movl %eax, -32(%ebp) -/// movl -36(%ebp), %eax -/// orl %eax, -32(%ebp) -/// ==> -/// xorl %edi, %eax -/// orl -36(%ebp), %eax -/// mov %eax, -32(%ebp) -/// This enables unfolding optimization for a subsequent instruction which will -/// also eliminate the newly introduced store instruction. -bool LocalRewriter:: -OptimizeByUnfold(MachineBasicBlock::iterator &MII, - std::vector &MaybeDeadStores, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector &KillOps) { - MachineFunction &MF = *MBB->getParent(); - MachineInstr &MI = *MII; - unsigned UnfoldedOpc = 0; - unsigned UnfoldPR = 0; - unsigned UnfoldVR = 0; - int FoldedSS = VirtRegMap::NO_STACK_SLOT; - VirtRegMap::MI2VirtMapTy::const_iterator I, End; - for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) { - // Only transform a MI that folds a single register. - if (UnfoldedOpc) - return false; - UnfoldVR = I->second.first; - VirtRegMap::ModRef MR = I->second.second; - // MI2VirtMap be can updated which invalidate the iterator. - // Increment the iterator first. - ++I; - if (VRM->isAssignedReg(UnfoldVR)) - continue; - // If this reference is not a use, any previous store is now dead. - // Otherwise, the store to this stack slot is not dead anymore. - FoldedSS = VRM->getStackSlot(UnfoldVR); - MachineInstr* DeadStore = MaybeDeadStores[FoldedSS]; - if (DeadStore && (MR & VirtRegMap::isModRef)) { - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS); - if (!PhysReg || !DeadStore->readsRegister(PhysReg)) - continue; - UnfoldPR = PhysReg; - UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), - false, true); - } - } - - if (!UnfoldedOpc) { - if (!UnfoldVR) - return false; - - // Look for other unfolding opportunities. - return OptimizeByUnfold2(UnfoldVR, FoldedSS, MII, MaybeDeadStores, Spills, - RegKills, KillOps); - } - - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse()) - continue; - unsigned VirtReg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg()) - continue; - if (VRM->isAssignedReg(VirtReg)) { - unsigned PhysReg = VRM->getPhys(VirtReg); - if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR)) - return false; - } else if (VRM->isReMaterialized(VirtReg)) - continue; - int SS = VRM->getStackSlot(VirtReg); - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); - if (PhysReg) { - if (TRI->regsOverlap(PhysReg, UnfoldPR)) - return false; - continue; - } - if (VRM->hasPhys(VirtReg)) { - PhysReg = VRM->getPhys(VirtReg); - if (!TRI->regsOverlap(PhysReg, UnfoldPR)) - continue; - } - - // Ok, we'll need to reload the value into a register which makes - // it impossible to perform the store unfolding optimization later. - // Let's see if it is possible to fold the load if the store is - // unfolded. This allows us to perform the store unfolding - // optimization. - SmallVector NewMIs; - if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) { - assert(NewMIs.size() == 1); - MachineInstr *NewMI = NewMIs.back(); - MBB->insert(MII, NewMI); - NewMIs.clear(); - int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false); - assert(Idx != -1); - SmallVector Ops; - Ops.push_back(Idx); - MachineInstr *FoldedMI = TII->foldMemoryOperand(NewMI, Ops, SS); - NewMI->eraseFromParent(); - if (FoldedMI) { - VRM->addSpillSlotUse(SS, FoldedMI); - if (!VRM->hasPhys(UnfoldVR)) - VRM->assignVirt2Phys(UnfoldVR, UnfoldPR); - VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); - MII = FoldedMI; - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - return true; - } - } - } - - return false; -} - -/// CommuteChangesDestination - We are looking for r0 = op r1, r2 and -/// where SrcReg is r1 and it is tied to r0. Return true if after -/// commuting this instruction it will be r0 = op r2, r1. -static bool CommuteChangesDestination(MachineInstr *DefMI, - const MCInstrDesc &MCID, - unsigned SrcReg, - const TargetInstrInfo *TII, - unsigned &DstIdx) { - if (MCID.getNumDefs() != 1 && MCID.getNumOperands() != 3) - return false; - if (!DefMI->getOperand(1).isReg() || - DefMI->getOperand(1).getReg() != SrcReg) - return false; - unsigned DefIdx; - if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0) - return false; - unsigned SrcIdx1, SrcIdx2; - if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2)) - return false; - if (SrcIdx1 == 1 && SrcIdx2 == 2) { - DstIdx = 2; - return true; - } - return false; -} - -/// CommuteToFoldReload - -/// Look for -/// r1 = load fi#1 -/// r1 = op r1, r2 -/// store r1, fi#1 -/// -/// If op is commutable and r2 is killed, then we can xform these to -/// r2 = op r2, fi#1 -/// store r2, fi#1 -bool LocalRewriter:: -CommuteToFoldReload(MachineBasicBlock::iterator &MII, - unsigned VirtReg, unsigned SrcReg, int SS, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector &KillOps, - const TargetRegisterInfo *TRI) { - if (MII == MBB->begin() || !MII->killsRegister(SrcReg)) - return false; - - MachineInstr &MI = *MII; - MachineBasicBlock::iterator DefMII = prior(MII); - MachineInstr *DefMI = DefMII; - const MCInstrDesc &MCID = DefMI->getDesc(); - unsigned NewDstIdx; - if (DefMII != MBB->begin() && - MCID.isCommutable() && - CommuteChangesDestination(DefMI, MCID, SrcReg, TII, NewDstIdx)) { - MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); - unsigned NewReg = NewDstMO.getReg(); - if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg)) - return false; - MachineInstr *ReloadMI = prior(DefMII); - int FrameIdx; - unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx); - if (DestReg != SrcReg || FrameIdx != SS) - return false; - int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false); - if (UseIdx == -1) - return false; - unsigned DefIdx; - if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx)) - return false; - assert(DefMI->getOperand(DefIdx).isReg() && - DefMI->getOperand(DefIdx).getReg() == SrcReg); - - // Now commute def instruction. - MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true); - if (!CommutedMI) - return false; - MBB->insert(MII, CommutedMI); - SmallVector Ops; - Ops.push_back(NewDstIdx); - MachineInstr *FoldedMI = TII->foldMemoryOperand(CommutedMI, Ops, SS); - // Not needed since foldMemoryOperand returns new MI. - CommutedMI->eraseFromParent(); - if (!FoldedMI) - return false; - - VRM->addSpillSlotUse(SS, FoldedMI); - VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); - // Insert new def MI and spill MI. - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC, TRI); - MII = prior(MII); - MachineInstr *StoreMI = MII; - VRM->addSpillSlotUse(SS, StoreMI); - VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); - MII = FoldedMI; // Update MII to backtrack. - - // Delete all 3 old instructions. - InvalidateKills(*ReloadMI, TRI, RegKills, KillOps); - EraseInstr(ReloadMI); - InvalidateKills(*DefMI, TRI, RegKills, KillOps); - EraseInstr(DefMI); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - - // If NewReg was previously holding value of some SS, it's now clobbered. - // This has to be done now because it's a physical register. When this - // instruction is re-visited, it's ignored. - Spills.ClobberPhysReg(NewReg); - - ++NumCommutes; - return true; - } - - return false; -} - -/// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if -/// the last store to the same slot is now dead. If so, remove the last store. -void LocalRewriter:: -SpillRegToStackSlot(MachineBasicBlock::iterator &MII, - int Idx, unsigned PhysReg, int StackSlot, - const TargetRegisterClass *RC, - bool isAvailable, MachineInstr *&LastStore, - AvailableSpills &Spills, - SmallSet &ReMatDefs, - BitVector &RegKills, - std::vector &KillOps) { - - MachineBasicBlock::iterator oldNextMII = llvm::next(MII); - TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC, - TRI); - MachineInstr *StoreMI = prior(oldNextMII); - VRM->addSpillSlotUse(StackSlot, StoreMI); - DEBUG(dbgs() << "Store:\t" << *StoreMI); - - // If there is a dead store to this stack slot, nuke it now. - if (LastStore) { - DEBUG(dbgs() << "Removed dead store:\t" << *LastStore); - ++NumDSE; - SmallVector KillRegs; - InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs); - MachineBasicBlock::iterator PrevMII = LastStore; - bool CheckDef = PrevMII != MBB->begin(); - if (CheckDef) - --PrevMII; - EraseInstr(LastStore); - if (CheckDef) { - // Look at defs of killed registers on the store. Mark the defs - // as dead since the store has been deleted and they aren't - // being reused. - for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) { - bool HasOtherDef = false; - if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) { - MachineInstr *DeadDef = PrevMII; - if (ReMatDefs.count(DeadDef) && !HasOtherDef) { - // FIXME: This assumes a remat def does not have side effects. - EraseInstr(DeadDef); - ++NumDRM; - } - } - } - } - } - - // Allow for multi-instruction spill sequences, as on PPC Altivec. Presume - // the last of multiple instructions is the actual store. - LastStore = prior(oldNextMII); - - // If the stack slot value was previously available in some other - // register, change it now. Otherwise, make the register available, - // in PhysReg. - Spills.ModifyStackSlotOrReMat(StackSlot); - Spills.ClobberPhysReg(PhysReg); - Spills.addAvailable(StackSlot, PhysReg, isAvailable); - ++NumStores; -} - -/// isSafeToDelete - Return true if this instruction doesn't produce any side -/// effect and all of its defs are dead. -static bool isSafeToDelete(MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - if (MCID.mayLoad() || MCID.mayStore() || MCID.isTerminator() || - MCID.isCall() || MCID.isBarrier() || MCID.isReturn() || - MI.isLabel() || MI.isDebugValue() || - MI.hasUnmodeledSideEffects()) - return false; - - // Technically speaking inline asm without side effects and no defs can still - // be deleted. But there is so much bad inline asm code out there, we should - // let them be. - if (MI.isInlineAsm()) - return false; - - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.getReg()) - continue; - if (MO.isDef() && !MO.isDead()) - return false; - if (MO.isUse() && MO.isKill()) - // FIXME: We can't remove kill markers or else the scavenger will assert. - // An alternative is to add a ADD pseudo instruction to replace kill - // markers. - return false; - } - return true; -} - -/// TransferDeadness - A identity copy definition is dead and it's being -/// removed. Find the last def or use and mark it as dead / kill. -void LocalRewriter:: -TransferDeadness(unsigned Reg, BitVector &RegKills, - std::vector &KillOps) { - SmallPtrSet Seens; - SmallVector,8> Refs; - for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg), - RE = MRI->reg_end(); RI != RE; ++RI) { - MachineInstr *UDMI = &*RI; - if (UDMI->isDebugValue() || UDMI->getParent() != MBB) - continue; - DenseMap::iterator DI = DistanceMap.find(UDMI); - if (DI == DistanceMap.end()) - continue; - if (Seens.insert(UDMI)) - Refs.push_back(std::make_pair(UDMI, DI->second)); - } - - if (Refs.empty()) - return; - std::sort(Refs.begin(), Refs.end(), RefSorter()); - - while (!Refs.empty()) { - MachineInstr *LastUDMI = Refs.back().first; - Refs.pop_back(); - - MachineOperand *LastUD = NULL; - for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = LastUDMI->getOperand(i); - if (!MO.isReg() || MO.getReg() != Reg) - continue; - if (!LastUD || (LastUD->isUse() && MO.isDef())) - LastUD = &MO; - if (LastUDMI->isRegTiedToDefOperand(i)) - break; - } - if (LastUD->isDef()) { - // If the instruction has no side effect, delete it and propagate - // backward further. Otherwise, mark is dead and we are done. - if (!isSafeToDelete(*LastUDMI)) { - LastUD->setIsDead(); - break; - } - EraseInstr(LastUDMI); - } else { - LastUD->setIsKill(); - RegKills.set(Reg); - KillOps[Reg] = LastUD; - break; - } - } -} - -/// InsertEmergencySpills - Insert emergency spills before MI if requested by -/// VRM. Return true if spills were inserted. -bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) { - if (!VRM->hasEmergencySpills(MI)) - return false; - MachineBasicBlock::iterator MII = MI; - SmallSet UsedSS; - std::vector &EmSpills = VRM->getEmergencySpills(MI); - for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) { - unsigned PhysReg = EmSpills[i]; - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg); - assert(RC && "Unable to determine register class!"); - int SS = VRM->getEmergencySpillSlot(RC); - if (UsedSS.count(SS)) - llvm_unreachable("Need to spill more than one physical registers!"); - UsedSS.insert(SS); - TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC, TRI); - MachineInstr *StoreMI = prior(MII); - VRM->addSpillSlotUse(SS, StoreMI); - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(llvm::next(MII), MBB->begin(), PhysReg, TRI, false, SS, - TII, *MBB->getParent()); - - TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC, TRI); - - MachineInstr *LoadMI = prior(InsertLoc); - VRM->addSpillSlotUse(SS, LoadMI); - ++NumPSpills; - DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size())); - } - return true; -} - -/// InsertRestores - Restore registers before MI is requested by VRM. Return -/// true is any instructions were inserted. -bool LocalRewriter::InsertRestores(MachineInstr *MI, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector &KillOps) { - if (!VRM->isRestorePt(MI)) - return false; - MachineBasicBlock::iterator MII = MI; - std::vector &RestoreRegs = VRM->getRestorePtRestores(MI); - for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) { - unsigned VirtReg = RestoreRegs[e-i-1]; // Reverse order. - if (!VRM->getPreSplitReg(VirtReg)) - continue; // Split interval spilled again. - unsigned Phys = VRM->getPhys(VirtReg); - MRI->setPhysRegUsed(Phys); - - // Check if the value being restored if available. If so, it must be - // from a predecessor BB that fallthrough into this BB. We do not - // expect: - // BB1: - // r1 = load fi#1 - // ... - // = r1 - // ... # r1 not clobbered - // ... - // = load fi#1 - bool DoReMat = VRM->isReMaterialized(VirtReg); - int SSorRMId = DoReMat - ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg); - unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); - if (InReg == Phys) { - // If the value is already available in the expected register, save - // a reload / remat. - if (SSorRMId) - DEBUG(dbgs() << "Reusing RM#" - << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Reusing SS#" << SSorRMId); - DEBUG(dbgs() << " from physreg " - << TRI->getName(InReg) << " for " << PrintReg(VirtReg) - <<" instead of reloading into physreg " - << TRI->getName(Phys) << '\n'); - - // Reusing a physreg may resurrect it. But we expect ProcessUses to update - // the kill flags for the current instruction after processing it. - - ++NumOmitted; - continue; - } else if (InReg && InReg != Phys) { - if (SSorRMId) - DEBUG(dbgs() << "Reusing RM#" - << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Reusing SS#" << SSorRMId); - DEBUG(dbgs() << " from physreg " - << TRI->getName(InReg) << " for " << PrintReg(VirtReg) - <<" by copying it into physreg " - << TRI->getName(Phys) << '\n'); - - // If the reloaded / remat value is available in another register, - // copy it to the desired register. - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII, - *MBB->getParent()); - MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI->getDebugLoc(), - TII->get(TargetOpcode::COPY), Phys) - .addReg(InReg, RegState::Kill); - - // This invalidates Phys. - Spills.ClobberPhysReg(Phys); - // Remember it's available. - Spills.addAvailable(SSorRMId, Phys); - - CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); - UpdateKills(*CopyMI, TRI, RegKills, KillOps); - - DEBUG(dbgs() << '\t' << *CopyMI); - ++NumCopified; - continue; - } - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII, - *MBB->getParent()); - - if (VRM->isReMaterialized(VirtReg)) { - ReMaterialize(*MBB, InsertLoc, Phys, VirtReg, TII, TRI, *VRM); - } else { - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC, TRI); - MachineInstr *LoadMI = prior(InsertLoc); - VRM->addSpillSlotUse(SSorRMId, LoadMI); - ++NumLoads; - DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size())); - } - - // This invalidates Phys. - Spills.ClobberPhysReg(Phys); - // Remember it's available. - Spills.addAvailable(SSorRMId, Phys); - - UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(dbgs() << '\t' << *prior(MII)); - } - return true; -} - -/// InsertSpills - Insert spills after MI if requested by VRM. Return -/// true if spills were inserted. -bool LocalRewriter::InsertSpills(MachineInstr *MI) { - if (!VRM->isSpillPt(MI)) - return false; - MachineBasicBlock::iterator MII = MI; - std::vector > &SpillRegs = - VRM->getSpillPtSpills(MI); - for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) { - unsigned VirtReg = SpillRegs[i].first; - bool isKill = SpillRegs[i].second; - if (!VRM->getPreSplitReg(VirtReg)) - continue; // Split interval spilled again. - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - unsigned Phys = VRM->getPhys(VirtReg); - int StackSlot = VRM->getStackSlot(VirtReg); - MachineBasicBlock::iterator oldNextMII = llvm::next(MII); - TII->storeRegToStackSlot(*MBB, llvm::next(MII), Phys, isKill, StackSlot, - RC, TRI); - MachineInstr *StoreMI = prior(oldNextMII); - VRM->addSpillSlotUse(StackSlot, StoreMI); - DEBUG(dbgs() << "Store:\t" << *StoreMI); - VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); - } - return true; -} - - -/// ProcessUses - Process all of MI's spilled operands and all available -/// operands. -void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills, - std::vector &MaybeDeadStores, - BitVector &RegKills, - ReuseInfo &ReusedOperands, - std::vector &KillOps) { - // Clear kill info. - SmallSet KilledMIRegs; - SmallVector VirtUseOps; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.getReg() == 0) - continue; // Ignore non-register operands. - - unsigned VirtReg = MO.getReg(); - - if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) { - // Ignore physregs for spilling, but remember that it is used by this - // function. - MRI->setPhysRegUsed(VirtReg); - continue; - } - - // We want to process implicit virtual register uses first. - if (MO.isImplicit()) - // If the virtual register is implicitly defined, emit a implicit_def - // before so scavenger knows it's "defined". - // FIXME: This is a horrible hack done the by register allocator to - // remat a definition with virtual register operand. - VirtUseOps.insert(VirtUseOps.begin(), i); - else - VirtUseOps.push_back(i); - - // A partial def causes problems because the same operand both reads and - // writes the register. This rewriter is designed to rewrite uses and defs - // separately, so a partial def would already have been rewritten to a - // physreg by the time we get to processing defs. - // Add an implicit use operand to model the partial def. - if (MO.isDef() && MO.getSubReg() && MI.readsVirtualRegister(VirtReg) && - MI.findRegisterUseOperandIdx(VirtReg) == -1) { - VirtUseOps.insert(VirtUseOps.begin(), MI.getNumOperands()); - MI.addOperand(MachineOperand::CreateReg(VirtReg, - false, // isDef - true)); // isImplicit - DEBUG(dbgs() << "Partial redef: " << MI); - } - } - - // Process all of the spilled uses and all non spilled reg references. - SmallVector PotentialDeadStoreSlots; - KilledMIRegs.clear(); - for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) { - unsigned i = VirtUseOps[j]; - unsigned VirtReg = MI.getOperand(i).getReg(); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "Not a virtual register?"); - - unsigned SubIdx = MI.getOperand(i).getSubReg(); - if (VRM->isAssignedReg(VirtReg)) { - // This virtual register was assigned a physreg! - unsigned Phys = VRM->getPhys(VirtReg); - MRI->setPhysRegUsed(Phys); - if (MI.getOperand(i).isDef()) - ReusedOperands.markClobbered(Phys); - substitutePhysReg(MI.getOperand(i), Phys, *TRI); - if (VRM->isImplicitlyDefined(VirtReg)) - // FIXME: Is this needed? - BuildMI(*MBB, &MI, MI.getDebugLoc(), - TII->get(TargetOpcode::IMPLICIT_DEF), Phys); - continue; - } - - // This virtual register is now known to be a spilled value. - if (!MI.getOperand(i).isUse()) - continue; // Handle defs in the loop below (handle use&def here though) - - bool AvoidReload = MI.getOperand(i).isUndef(); - // Check if it is defined by an implicit def. It should not be spilled. - // Note, this is for correctness reason. e.g. - // 8 %reg1024 = IMPLICIT_DEF - // 12 %reg1024 = INSERT_SUBREG %reg1024, %reg1025, 2 - // The live range [12, 14) are not part of the r1024 live interval since - // it's defined by an implicit def. It will not conflicts with live - // interval of r1025. Now suppose both registers are spilled, you can - // easily see a situation where both registers are reloaded before - // the INSERT_SUBREG and both target registers that would overlap. - bool DoReMat = VRM->isReMaterialized(VirtReg); - int SSorRMId = DoReMat - ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg); - int ReuseSlot = SSorRMId; - - // Check to see if this stack slot is available. - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); - - // If this is a sub-register use, make sure the reuse register is in the - // right register class. For example, for x86 not all of the 32-bit - // registers have accessible sub-registers. - // Similarly so for EXTRACT_SUBREG. Consider this: - // EDI = op - // MOV32_mr fi#1, EDI - // ... - // = EXTRACT_SUBREG fi#1 - // fi#1 is available in EDI, but it cannot be reused because it's not in - // the right register file. - if (PhysReg && !AvoidReload && SubIdx) { - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - if (!RC->contains(PhysReg)) - PhysReg = 0; - } - - if (PhysReg && !AvoidReload) { - // This spilled operand might be part of a two-address operand. If this - // is the case, then changing it will necessarily require changing the - // def part of the instruction as well. However, in some cases, we - // aren't allowed to modify the reused register. If none of these cases - // apply, reuse it. - bool CanReuse = true; - bool isTied = MI.isRegTiedToDefOperand(i); - if (isTied) { - // Okay, we have a two address operand. We can reuse this physreg as - // long as we are allowed to clobber the value and there isn't an - // earlier def that has already clobbered the physreg. - CanReuse = !ReusedOperands.isClobbered(PhysReg) && - Spills.canClobberPhysReg(PhysReg); - } - // If this is an asm, and a PhysReg alias is used elsewhere as an - // earlyclobber operand, we can't also use it as an input. - if (MI.isInlineAsm()) { - for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) { - MachineOperand &MOk = MI.getOperand(k); - if (MOk.isReg() && MOk.isEarlyClobber() && - TRI->regsOverlap(MOk.getReg(), PhysReg)) { - CanReuse = false; - DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg) - << " for " << PrintReg(VirtReg) << ": " << MOk - << '\n'); - break; - } - } - } - - if (CanReuse) { - // If this stack slot value is already available, reuse it! - if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) - DEBUG(dbgs() << "Reusing RM#" - << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); - DEBUG(dbgs() << " from physreg " - << TRI->getName(PhysReg) << " for " << PrintReg(VirtReg) - << " instead of reloading into " - << PrintReg(VRM->getPhys(VirtReg), TRI) << '\n'); - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - - // Reusing a physreg may resurrect it. But we expect ProcessUses to - // update the kill flags for the current instr after processing it. - - // The only technical detail we have is that we don't know that - // PhysReg won't be clobbered by a reloaded stack slot that occurs - // later in the instruction. In particular, consider 'op V1, V2'. - // If V1 is available in physreg R0, we would choose to reuse it - // here, instead of reloading it into the register the allocator - // indicated (say R1). However, V2 might have to be reloaded - // later, and it might indicate that it needs to live in R0. When - // this occurs, we need to have information available that - // indicates it is safe to use R1 for the reload instead of R0. - // - // To further complicate matters, we might conflict with an alias, - // or R0 and R1 might not be compatible with each other. In this - // case, we actually insert a reload for V1 in R1, ensuring that - // we can get at R0 or its alias. - ReusedOperands.addReuse(i, ReuseSlot, PhysReg, - VRM->getPhys(VirtReg), VirtReg); - if (isTied) - // Only mark it clobbered if this is a use&def operand. - ReusedOperands.markClobbered(PhysReg); - ++NumReused; - - if (MI.getOperand(i).isKill() && - ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) { - - // The store of this spilled value is potentially dead, but we - // won't know for certain until we've confirmed that the re-use - // above is valid, which means waiting until the other operands - // are processed. For now we just track the spill slot, we'll - // remove it after the other operands are processed if valid. - - PotentialDeadStoreSlots.push_back(ReuseSlot); - } - - // Mark is isKill if it's there no other uses of the same virtual - // register and it's not a two-address operand. IsKill will be - // unset if reg is reused. - if (!isTied && KilledMIRegs.count(VirtReg) == 0) { - MI.getOperand(i).setIsKill(); - KilledMIRegs.insert(VirtReg); - } - continue; - } // CanReuse - - // Otherwise we have a situation where we have a two-address instruction - // whose mod/ref operand needs to be reloaded. This reload is already - // available in some register "PhysReg", but if we used PhysReg as the - // operand to our 2-addr instruction, the instruction would modify - // PhysReg. This isn't cool if something later uses PhysReg and expects - // to get its initial value. - // - // To avoid this problem, and to avoid doing a load right after a store, - // we emit a copy from PhysReg into the designated register for this - // operand. - // - // This case also applies to an earlyclobber'd PhysReg. - unsigned DesignatedReg = VRM->getPhys(VirtReg); - assert(DesignatedReg && "Must map virtreg to physreg!"); - - // Note that, if we reused a register for a previous operand, the - // register we want to reload into might not actually be - // available. If this occurs, use the register indicated by the - // reuser. - if (ReusedOperands.hasReuses()) - DesignatedReg = ReusedOperands. - GetRegForReload(VirtReg, DesignatedReg, &MI, Spills, - MaybeDeadStores, RegKills, KillOps, *VRM); - - // If the mapped designated register is actually the physreg we have - // incoming, we don't need to inserted a dead copy. - if (DesignatedReg == PhysReg) { - // If this stack slot value is already available, reuse it! - if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) - DEBUG(dbgs() << "Reusing RM#" - << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); - DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg) - << " for " << PrintReg(VirtReg) - << " instead of reloading into same physreg.\n"); - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - ReusedOperands.markClobbered(RReg); - ++NumReused; - continue; - } - - MRI->setPhysRegUsed(DesignatedReg); - ReusedOperands.markClobbered(DesignatedReg); - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat, - SSorRMId, TII, *MBB->getParent()); - MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(), - TII->get(TargetOpcode::COPY), - DesignatedReg).addReg(PhysReg); - CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); - UpdateKills(*CopyMI, TRI, RegKills, KillOps); - - // This invalidates DesignatedReg. - Spills.ClobberPhysReg(DesignatedReg); - - Spills.addAvailable(ReuseSlot, DesignatedReg); - unsigned RReg = - SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - DEBUG(dbgs() << '\t' << *prior(InsertLoc)); - ++NumReused; - continue; - } // if (PhysReg) - - // Otherwise, reload it and remember that we have it. - PhysReg = VRM->getPhys(VirtReg); - assert(PhysReg && "Must map virtreg to physreg!"); - - // Note that, if we reused a register for a previous operand, the - // register we want to reload into might not actually be - // available. If this occurs, use the register indicated by the - // reuser. - if (ReusedOperands.hasReuses()) - PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, - Spills, MaybeDeadStores, RegKills, KillOps, *VRM); - - MRI->setPhysRegUsed(PhysReg); - ReusedOperands.markClobbered(PhysReg); - if (AvoidReload) - ++NumAvoided; - else { - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, DoReMat, - SSorRMId, TII, *MBB->getParent()); - - if (DoReMat) { - ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM); - } else { - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI); - MachineInstr *LoadMI = prior(InsertLoc); - VRM->addSpillSlotUse(SSorRMId, LoadMI); - ++NumLoads; - DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size())); - } - // This invalidates PhysReg. - Spills.ClobberPhysReg(PhysReg); - - // Any stores to this stack slot are not dead anymore. - if (!DoReMat) - MaybeDeadStores[SSorRMId] = NULL; - Spills.addAvailable(SSorRMId, PhysReg); - // Assumes this is the last use. IsKill will be unset if reg is reused - // unless it's a two-address operand. - if (!MI.isRegTiedToDefOperand(i) && - KilledMIRegs.count(VirtReg) == 0) { - MI.getOperand(i).setIsKill(); - KilledMIRegs.insert(VirtReg); - } - - UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(dbgs() << '\t' << *prior(InsertLoc)); - } - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - } - - // Ok - now we can remove stores that have been confirmed dead. - for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) { - // This was the last use and the spilled value is still available - // for reuse. That means the spill was unnecessary! - int PDSSlot = PotentialDeadStoreSlots[j]; - MachineInstr* DeadStore = MaybeDeadStores[PDSSlot]; - if (DeadStore) { - DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore); - InvalidateKills(*DeadStore, TRI, RegKills, KillOps); - EraseInstr(DeadStore); - MaybeDeadStores[PDSSlot] = NULL; - ++NumDSE; - } - } -} - -/// rewriteMBB - Keep track of which spills are available even after the -/// register allocator is done with them. If possible, avoid reloading vregs. -void -LocalRewriter::RewriteMBB(LiveIntervals *LIs, - AvailableSpills &Spills, BitVector &RegKills, - std::vector &KillOps) { - - DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '" - << MBB->getName() << "':\n"); - - MachineFunction &MF = *MBB->getParent(); - - // MaybeDeadStores - When we need to write a value back into a stack slot, - // keep track of the inserted store. If the stack slot value is never read - // (because the value was used from some available register, for example), and - // subsequently stored to, the original store is dead. This map keeps track - // of inserted stores that are not used. If we see a subsequent store to the - // same stack slot, the original store is deleted. - std::vector MaybeDeadStores; - MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL); - - // ReMatDefs - These are rematerializable def MIs which are not deleted. - SmallSet ReMatDefs; - - // Keep track of the registers we have already spilled in case there are - // multiple defs of the same register in MI. - SmallSet SpilledMIRegs; - - RegKills.reset(); - KillOps.clear(); - KillOps.resize(TRI->getNumRegs(), NULL); - - DistanceMap.clear(); - for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); - MII != E; ) { - MachineBasicBlock::iterator NextMII = llvm::next(MII); - - if (OptimizeByUnfold(MII, MaybeDeadStores, Spills, RegKills, KillOps)) - NextMII = llvm::next(MII); - - if (InsertEmergencySpills(MII)) - NextMII = llvm::next(MII); - - InsertRestores(MII, Spills, RegKills, KillOps); - - if (InsertSpills(MII)) - NextMII = llvm::next(MII); - - bool Erased = false; - bool BackTracked = false; - MachineInstr &MI = *MII; - - // Remember DbgValue's which reference stack slots. - if (MI.isDebugValue() && MI.getOperand(0).isFI()) - Slot2DbgValues[MI.getOperand(0).getIndex()].push_back(&MI); - - /// ReusedOperands - Keep track of operand reuse in case we need to undo - /// reuse. - ReuseInfo ReusedOperands(MI, TRI); - - ProcessUses(MI, Spills, MaybeDeadStores, RegKills, ReusedOperands, KillOps); - - DEBUG(dbgs() << '\t' << MI); - - - // If we have folded references to memory operands, make sure we clear all - // physical registers that may contain the value of the spilled virtual - // register - - // Copy the folded virts to a small vector, we may change MI2VirtMap. - SmallVector, 4> FoldedVirts; - // C++0x FTW! - for (std::pair FVRange = - VRM->getFoldedVirts(&MI); - FVRange.first != FVRange.second; ++FVRange.first) - FoldedVirts.push_back(FVRange.first->second); - - SmallSet FoldedSS; - for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) { - unsigned VirtReg = FoldedVirts[FVI].first; - VirtRegMap::ModRef MR = FoldedVirts[FVI].second; - DEBUG(dbgs() << "Folded " << PrintReg(VirtReg) << " MR: " << MR); - - int SS = VRM->getStackSlot(VirtReg); - if (SS == VirtRegMap::NO_STACK_SLOT) - continue; - FoldedSS.insert(SS); - DEBUG(dbgs() << " - StackSlot: " << SS << "\n"); - - // If this folded instruction is just a use, check to see if it's a - // straight load from the virt reg slot. - if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) { - int FrameIdx; - unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx); - if (DestReg && FrameIdx == SS) { - // If this spill slot is available, turn it into a copy (or nothing) - // instead of leaving it as a load! - if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) { - DEBUG(dbgs() << "Promoted Load To Copy: " << MI); - if (DestReg != InReg) { - MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg); - MachineInstr *CopyMI = BuildMI(*MBB, &MI, MI.getDebugLoc(), - TII->get(TargetOpcode::COPY)) - .addReg(DestReg, RegState::Define, DefMO->getSubReg()) - .addReg(InReg, RegState::Kill); - // Revisit the copy so we make sure to notice the effects of the - // operation on the destreg (either needing to RA it if it's - // virtual or needing to clobber any values if it's physical). - NextMII = CopyMI; - NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse); - BackTracked = true; - } else { - DEBUG(dbgs() << "Removing now-noop copy: " << MI); - // InvalidateKills resurrects any prior kill of the copy's source - // allowing the source reg to be reused in place of the copy. - Spills.disallowClobberPhysReg(InReg); - } - - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - goto ProcessNextInst; - } - } else { - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); - SmallVector NewMIs; - if (PhysReg && - TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){ - MBB->insert(MII, NewMIs[0]); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - --NextMII; // backtrack to the unfolded instruction. - BackTracked = true; - goto ProcessNextInst; - } - } - } - - // If this reference is not a use, any previous store is now dead. - // Otherwise, the store to this stack slot is not dead anymore. - MachineInstr* DeadStore = MaybeDeadStores[SS]; - if (DeadStore) { - bool isDead = !(MR & VirtRegMap::isRef); - MachineInstr *NewStore = NULL; - if (MR & VirtRegMap::isModRef) { - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); - SmallVector NewMIs; - // We can reuse this physreg as long as we are allowed to clobber - // the value and there isn't an earlier def that has already clobbered - // the physreg. - if (PhysReg && - !ReusedOperands.isClobbered(PhysReg) && - Spills.canClobberPhysReg(PhysReg) && - !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable! - MachineOperand *KillOpnd = - DeadStore->findRegisterUseOperand(PhysReg, true); - // Note, if the store is storing a sub-register, it's possible the - // super-register is needed below. - if (KillOpnd && !KillOpnd->getSubReg() && - TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){ - MBB->insert(MII, NewMIs[0]); - NewStore = NewMIs[1]; - MBB->insert(MII, NewStore); - VRM->addSpillSlotUse(SS, NewStore); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - --NextMII; - --NextMII; // backtrack to the unfolded instruction. - BackTracked = true; - isDead = true; - ++NumSUnfold; - } - } - } - - if (isDead) { // Previous store is dead. - // If we get here, the store is dead, nuke it now. - DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore); - InvalidateKills(*DeadStore, TRI, RegKills, KillOps); - EraseInstr(DeadStore); - if (!NewStore) - ++NumDSE; - } - - MaybeDeadStores[SS] = NULL; - if (NewStore) { - // Treat this store as a spill merged into a copy. That makes the - // stack slot value available. - VRM->virtFolded(VirtReg, NewStore, VirtRegMap::isMod); - goto ProcessNextInst; - } - } - - // If the spill slot value is available, and this is a new definition of - // the value, the value is not available anymore. - if (MR & VirtRegMap::isMod) { - // Notice that the value in this stack slot has been modified. - Spills.ModifyStackSlotOrReMat(SS); - - // If this is *just* a mod of the value, check to see if this is just a - // store to the spill slot (i.e. the spill got merged into the copy). If - // so, realize that the vreg is available now, and add the store to the - // MaybeDeadStore info. - int StackSlot; - if (!(MR & VirtRegMap::isRef)) { - if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) { - assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) && - "Src hasn't been allocated yet?"); - - if (CommuteToFoldReload(MII, VirtReg, SrcReg, StackSlot, - Spills, RegKills, KillOps, TRI)) { - NextMII = llvm::next(MII); - BackTracked = true; - goto ProcessNextInst; - } - - // Okay, this is certainly a store of SrcReg to [StackSlot]. Mark - // this as a potentially dead store in case there is a subsequent - // store into the stack slot without a read from it. - MaybeDeadStores[StackSlot] = &MI; - - // If the stack slot value was previously available in some other - // register, change it now. Otherwise, make the register - // available in PhysReg. - Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg)); - } - } - } - } - - // Process all of the spilled defs. - SpilledMIRegs.clear(); - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!(MO.isReg() && MO.getReg() && MO.isDef())) - continue; - - unsigned VirtReg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) { - // Check to see if this is a noop copy. If so, eliminate the - // instruction before considering the dest reg to be changed. - // Also check if it's copying from an "undef", if so, we can't - // eliminate this or else the undef marker is lost and it will - // confuses the scavenger. This is extremely rare. - if (MI.isIdentityCopy() && !MI.getOperand(1).isUndef() && - MI.getNumOperands() == 2) { - ++NumDCE; - DEBUG(dbgs() << "Removing now-noop copy: " << MI); - SmallVector KillRegs; - InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs); - if (MO.isDead() && !KillRegs.empty()) { - // Source register or an implicit super/sub-register use is killed. - assert(TRI->regsOverlap(KillRegs[0], MI.getOperand(0).getReg())); - // Last def is now dead. - TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps); - } - EraseInstr(&MI); - Erased = true; - Spills.disallowClobberPhysReg(VirtReg); - goto ProcessNextInst; - } - - // If it's not a no-op copy, it clobbers the value in the destreg. - Spills.ClobberPhysReg(VirtReg); - ReusedOperands.markClobbered(VirtReg); - - // Check to see if this instruction is a load from a stack slot into - // a register. If so, this provides the stack slot value in the reg. - int FrameIdx; - if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) { - assert(DestReg == VirtReg && "Unknown load situation!"); - - // If it is a folded reference, then it's not safe to clobber. - bool Folded = FoldedSS.count(FrameIdx); - // Otherwise, if it wasn't available, remember that it is now! - Spills.addAvailable(FrameIdx, DestReg, !Folded); - goto ProcessNextInst; - } - - continue; - } - - unsigned SubIdx = MO.getSubReg(); - bool DoReMat = VRM->isReMaterialized(VirtReg); - if (DoReMat) - ReMatDefs.insert(&MI); - - // The only vregs left are stack slot definitions. - int StackSlot = VRM->getStackSlot(VirtReg); - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - - // If this def is part of a two-address operand, make sure to execute - // the store from the correct physical register. - unsigned PhysReg; - unsigned TiedOp; - if (MI.isRegTiedToUseOperand(i, &TiedOp)) { - PhysReg = MI.getOperand(TiedOp).getReg(); - if (SubIdx) { - unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI); - assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg && - "Can't find corresponding super-register!"); - PhysReg = SuperReg; - } - } else { - PhysReg = VRM->getPhys(VirtReg); - if (ReusedOperands.isClobbered(PhysReg)) { - // Another def has taken the assigned physreg. It must have been a - // use&def which got it due to reuse. Undo the reuse! - PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, - Spills, MaybeDeadStores, RegKills, KillOps, *VRM); - } - } - - // If StackSlot is available in a register that also holds other stack - // slots, clobber those stack slots now. - Spills.ClobberSharingStackSlots(StackSlot); - - assert(PhysReg && "VR not assigned a physical register?"); - MRI->setPhysRegUsed(PhysReg); - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - ReusedOperands.markClobbered(RReg); - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - - if (!MO.isDead() && SpilledMIRegs.insert(VirtReg)) { - MachineInstr *&LastStore = MaybeDeadStores[StackSlot]; - SpillRegToStackSlot(MII, -1, PhysReg, StackSlot, RC, true, - LastStore, Spills, ReMatDefs, RegKills, KillOps); - NextMII = llvm::next(MII); - - // Check to see if this is a noop copy. If so, eliminate the - // instruction before considering the dest reg to be changed. - if (MI.isIdentityCopy()) { - ++NumDCE; - DEBUG(dbgs() << "Removing now-noop copy: " << MI); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - UpdateKills(*LastStore, TRI, RegKills, KillOps); - goto ProcessNextInst; - } - } - } - ProcessNextInst: - // Delete dead instructions without side effects. - if (!Erased && !BackTracked && isSafeToDelete(MI)) { - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - } - if (!Erased) - DistanceMap.insert(std::make_pair(&MI, DistanceMap.size())); - if (!Erased && !BackTracked) { - for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II) - UpdateKills(*II, TRI, RegKills, KillOps); - } - MII = NextMII; - } - -} - -llvm::VirtRegRewriter* llvm::createVirtRegRewriter() { - switch (RewriterOpt) { - default: llvm_unreachable("Unreachable!"); - case local: - return new LocalRewriter(); - case trivial: - return new TrivialRewriter(); - } -} diff --git a/contrib/llvm/lib/CodeGen/VirtRegRewriter.h b/contrib/llvm/lib/CodeGen/VirtRegRewriter.h deleted file mode 100644 index 93474e0d7ff7..000000000000 --- a/contrib/llvm/lib/CodeGen/VirtRegRewriter.h +++ /dev/null @@ -1,32 +0,0 @@ -//===-- llvm/CodeGen/VirtRegRewriter.h - VirtRegRewriter -*- C++ -*--------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_VIRTREGREWRITER_H -#define LLVM_CODEGEN_VIRTREGREWRITER_H - -namespace llvm { - class LiveIntervals; - class MachineFunction; - class VirtRegMap; - - /// VirtRegRewriter interface: Implementations of this interface assign - /// spilled virtual registers to stack slots, rewriting the code. - struct VirtRegRewriter { - virtual ~VirtRegRewriter(); - virtual bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, - LiveIntervals* LIs) = 0; - }; - - /// createVirtRegRewriter - Create an return a rewriter object, as specified - /// on the command line. - VirtRegRewriter* createVirtRegRewriter(); - -} - -#endif diff --git a/contrib/llvm/lib/DebugInfo/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARFContext.cpp index e1ac398b1011..dccadc4ea4da 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFContext.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFContext.cpp @@ -165,3 +165,5 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t address) { return DILineInfo(fileName.c_str(), row.Line, row.Column); } + +void DWARFContextInMemory::anchor() { } diff --git a/contrib/llvm/lib/DebugInfo/DWARFContext.h b/contrib/llvm/lib/DebugInfo/DWARFContext.h index 746a4639f277..d2e763a87a45 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFContext.h +++ b/contrib/llvm/lib/DebugInfo/DWARFContext.h @@ -86,6 +86,7 @@ class DWARFContext : public DIContext { /// DWARFContext. It assumes all content is available in memory and stores /// pointers to it. class DWARFContextInMemory : public DWARFContext { + virtual void anchor(); StringRef InfoSection; StringRef AbbrevSection; StringRef ARangeSection; diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.cpp index a11ae3f2908e..6e6c37e30945 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.cpp @@ -83,7 +83,7 @@ void DWARFDebugAbbrev::dump(raw_ostream &OS) const { DWARFAbbreviationDeclarationCollMapConstIter pos; for (pos = AbbrevCollMap.begin(); pos != AbbrevCollMap.end(); ++pos) { - OS << format("Abbrev table for offset: 0x%8.8x\n", pos->first); + OS << format("Abbrev table for offset: 0x%8.8" PRIx64 "\n", pos->first); pos->second.dump(OS); } } diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.h b/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.h index 03189b132127..c7c0436866c4 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.h +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugAbbrev.h @@ -25,21 +25,21 @@ typedef DWARFAbbreviationDeclarationColl::const_iterator DWARFAbbreviationDeclarationCollConstIter; class DWARFAbbreviationDeclarationSet { - uint64_t Offset; + uint32_t Offset; uint32_t IdxOffset; std::vector Decls; public: DWARFAbbreviationDeclarationSet() : Offset(0), IdxOffset(0) {} - DWARFAbbreviationDeclarationSet(uint64_t offset, uint32_t idxOffset) + DWARFAbbreviationDeclarationSet(uint32_t offset, uint32_t idxOffset) : Offset(offset), IdxOffset(idxOffset) {} void clear() { IdxOffset = 0; Decls.clear(); } - uint64_t getOffset() const { return Offset; } + uint32_t getOffset() const { return Offset; } void dump(raw_ostream &OS) const; bool extract(DataExtractor data, uint32_t* offset_ptr); diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.cpp index b0c0354383b9..2efbfd1f92fb 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.cpp @@ -122,8 +122,9 @@ void DWARFDebugArangeSet::dump(raw_ostream &OS) const { const uint32_t hex_width = Header.AddrSize * 2; for (DescriptorConstIter pos = ArangeDescriptors.begin(), end = ArangeDescriptors.end(); pos != end; ++pos) - OS << format("[0x%*.*llx -", hex_width, hex_width, pos->Address) - << format(" 0x%*.*llx)\n", hex_width, hex_width, pos->getEndAddress()); + OS << format("[0x%*.*" PRIx64 " -", hex_width, hex_width, pos->Address) + << format(" 0x%*.*" PRIx64 ")\n", + hex_width, hex_width, pos->getEndAddress()); } diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.cpp index 576d37d7813a..178814535612 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.cpp @@ -100,13 +100,14 @@ void DWARFDebugAranges::dump(raw_ostream &OS) const { const uint32_t num_ranges = getNumRanges(); for (uint32_t i = 0; i < num_ranges; ++i) { const Range &range = Aranges[i]; - OS << format("0x%8.8x: [0x%8.8llx - 0x%8.8llx)\n", range.Offset, - (uint64_t)range.LoPC, (uint64_t)range.HiPC()); + OS << format("0x%8.8x: [0x%8.8" PRIx64 " - 0x%8.8" PRIx64 ")\n", + range.Offset, (uint64_t)range.LoPC, (uint64_t)range.HiPC()); } } void DWARFDebugAranges::Range::dump(raw_ostream &OS) const { - OS << format("{0x%8.8x}: [0x%8.8llx - 0x%8.8llx)\n", Offset, LoPC, HiPC()); + OS << format("{0x%8.8x}: [0x%8.8" PRIx64 " - 0x%8.8" PRIx64 ")\n", + Offset, LoPC, HiPC()); } void DWARFDebugAranges::appendRange(uint32_t offset, uint64_t low_pc, diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp index 1b089adbe13b..236db97c44af 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp @@ -26,7 +26,7 @@ void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, uint32_t offset = Offset; if (debug_info_data.isValidOffset(offset)) { - uint64_t abbrCode = debug_info_data.getULEB128(&offset); + uint32_t abbrCode = debug_info_data.getULEB128(&offset); OS << format("\n0x%8.8x: ", Offset); if (abbrCode) { @@ -203,8 +203,6 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, AbbrevDecl = NULL; return true; // NULL debug tag entry } - - return false; } bool diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h index aff2e8556729..37b3bcdd96e6 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h @@ -23,7 +23,7 @@ class DWARFFormValue; /// DWARFDebugInfoEntryMinimal - A DIE with only the minimum required data. class DWARFDebugInfoEntryMinimal { /// Offset within the .debug_info of the start of this entry. - uint64_t Offset; + uint32_t Offset; /// How many to subtract from "this" to get the parent. /// If zero this die has no parent. @@ -52,7 +52,7 @@ class DWARFDebugInfoEntryMinimal { uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; } bool isNULL() const { return AbbrevDecl == 0; } - uint64_t getOffset() const { return Offset; } + uint32_t getOffset() const { return Offset; } uint32_t getNumAttributes() const { return !isNULL() ? AbbrevDecl->getNumAttributes() : 0; } diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugLine.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugLine.cpp index fe1ef78b026e..117fa31aa86f 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFDebugLine.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugLine.cpp @@ -41,8 +41,9 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { "----------------\n"; for (uint32_t i = 0; i < FileNames.size(); ++i) { const FileNameEntry& fileEntry = FileNames[i]; - OS << format("file_names[%3u] %4u ", i+1, fileEntry.DirIdx) - << format("0x%8.8x 0x%8.8x ", fileEntry.ModTime, fileEntry.Length) + OS << format("file_names[%3u] %4" PRIu64 " ", i+1, fileEntry.DirIdx) + << format("0x%8.8" PRIx64 " 0x%8.8" PRIx64 " ", + fileEntry.ModTime, fileEntry.Length) << fileEntry.Name << '\n'; } } @@ -68,7 +69,7 @@ void DWARFDebugLine::Row::reset(bool default_is_stmt) { } void DWARFDebugLine::Row::dump(raw_ostream &OS) const { - OS << format("0x%16.16llx %6u %6u", Address, Line, Column) + OS << format("0x%16.16" PRIx64 " %6u %6u", Address, Line, Column) << format(" %6u %3u ", File, Isa) << (IsStmt ? " is_stmt" : "") << (BasicBlock ? " basic_block" : "") diff --git a/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp b/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp index 705efe5549b6..ee2a3ab7b789 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp @@ -263,12 +263,12 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { bool cu_relative_offset = false; switch (Form) { - case DW_FORM_addr: OS << format("0x%016x", uvalue); break; + case DW_FORM_addr: OS << format("0x%016" PRIx64, uvalue); break; case DW_FORM_flag: - case DW_FORM_data1: OS << format("0x%02x", uvalue); break; - case DW_FORM_data2: OS << format("0x%04x", uvalue); break; - case DW_FORM_data4: OS << format("0x%08x", uvalue); break; - case DW_FORM_data8: OS << format("0x%016x", uvalue); break; + case DW_FORM_data1: OS << format("0x%02x", (uint8_t)uvalue); break; + case DW_FORM_data2: OS << format("0x%04x", (uint16_t)uvalue); break; + case DW_FORM_data4: OS << format("0x%08x", (uint32_t)uvalue); break; + case DW_FORM_data8: OS << format("0x%016" PRIx64, uvalue); break; case DW_FORM_string: OS << '"'; OS.write_escaped(getAsCString(NULL)); @@ -280,7 +280,7 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { case DW_FORM_block4: if (uvalue > 0) { switch (Form) { - case DW_FORM_block: OS << format("<0x%llx> ", uvalue); break; + case DW_FORM_block: OS << format("<0x%" PRIx64 "> ", uvalue); break; case DW_FORM_block1: OS << format("<0x%2.2x> ", (uint8_t)uvalue); break; case DW_FORM_block2: OS << format("<0x%4.4x> ", (uint16_t)uvalue); break; case DW_FORM_block4: OS << format("<0x%8.8x> ", (uint32_t)uvalue); break; @@ -314,7 +314,7 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { break; } case DW_FORM_ref_addr: - OS << format("0x%016x", uvalue); + OS << format("0x%016" PRIx64, uvalue); break; case DW_FORM_ref1: cu_relative_offset = true; @@ -330,11 +330,11 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { break; case DW_FORM_ref8: cu_relative_offset = true; - OS << format("cu + 0x%8.8llx", uvalue); + OS << format("cu + 0x%8.8" PRIx64, uvalue); break; case DW_FORM_ref_udata: cu_relative_offset = true; - OS << format("cu + 0x%llx", uvalue); + OS << format("cu + 0x%" PRIx64, uvalue); break; // All DW_FORM_indirect attributes should be resolved prior to calling @@ -348,7 +348,7 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { } if (cu_relative_offset) - OS << format(" => {0x%8.8x}", (uvalue + (cu ? cu->getOffset() : 0))); + OS << format(" => {0x%8.8" PRIx64 "}", uvalue + (cu ? cu->getOffset() : 0)); } const char* diff --git a/contrib/llvm/lib/ExecutionEngine/EventListenerCommon.h b/contrib/llvm/lib/ExecutionEngine/EventListenerCommon.h new file mode 100644 index 000000000000..1c07c947142f --- /dev/null +++ b/contrib/llvm/lib/ExecutionEngine/EventListenerCommon.h @@ -0,0 +1,67 @@ +//===-- JIT.h - Abstract Execution Engine Interface -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Common functionality for JITEventListener implementations +// +//===----------------------------------------------------------------------===// + +#ifndef EVENT_LISTENER_COMMON_H +#define EVENT_LISTENER_COMMON_H + +#include "llvm/Metadata.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Support/Path.h" + +namespace llvm { + +namespace jitprofiling { + +class FilenameCache { + // Holds the filename of each Scope, so that we can pass a null-terminated + // string into oprofile. Use an AssertingVH rather than a ValueMap because we + // shouldn't be modifying any MDNodes while this map is alive. + DenseMap, std::string> Filenames; + DenseMap, std::string> Paths; + + public: + const char *getFilename(MDNode *Scope) { + std::string &Filename = Filenames[Scope]; + if (Filename.empty()) { + DIScope DIScope(Scope); + Filename = DIScope.getFilename(); + } + return Filename.c_str(); + } + + const char *getFullPath(MDNode *Scope) { + std::string &P = Paths[Scope]; + if (P.empty()) { + DIScope DIScope(Scope); + StringRef DirName = DIScope.getDirectory(); + StringRef FileName = DIScope.getFilename(); + SmallString<256> FullPath; + if (DirName != "." && DirName != "") { + FullPath = DirName; + } + if (FileName != "") { + sys::path::append(FullPath, FileName); + } + P = FullPath.str(); + } + return P.c_str(); + } +}; + +} // namespace jitprofiling + +} // namespace llvm + +#endif //EVENT_LISTENER_COMMON_H diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp index 525877b68900..a744d0c1e798 100644 --- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/Host.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include @@ -41,14 +42,12 @@ ExecutionEngine *(*ExecutionEngine::JITCtor)( Module *M, std::string *ErrorStr, JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel, bool GVsWithCode, TargetMachine *TM) = 0; ExecutionEngine *(*ExecutionEngine::MCJITCtor)( Module *M, std::string *ErrorStr, JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel, bool GVsWithCode, TargetMachine *TM) = 0; ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M, @@ -308,13 +307,12 @@ void ExecutionEngine::runStaticConstructorsDestructors(Module *module, // Should be an array of '{ i32, void ()* }' structs. The first value is // the init priority, which we ignore. - if (isa(GV->getInitializer())) + ConstantArray *InitList = dyn_cast(GV->getInitializer()); + if (InitList == 0) return; - ConstantArray *InitList = cast(GV->getInitializer()); for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { - if (isa(InitList->getOperand(i))) - continue; - ConstantStruct *CS = cast(InitList->getOperand(i)); + ConstantStruct *CS = dyn_cast(InitList->getOperand(i)); + if (CS == 0) continue; Constant *FP = CS->getOperand(1); if (FP->isNullValue()) @@ -404,14 +402,15 @@ ExecutionEngine *ExecutionEngine::create(Module *M, std::string *ErrorStr, CodeGenOpt::Level OptLevel, bool GVsWithCode) { - return EngineBuilder(M) + EngineBuilder EB = EngineBuilder(M) .setEngineKind(ForceInterpreter ? EngineKind::Interpreter : EngineKind::JIT) .setErrorStr(ErrorStr) .setOptLevel(OptLevel) - .setAllocateGVsWithCode(GVsWithCode) - .create(); + .setAllocateGVsWithCode(GVsWithCode); + + return EB.create(); } /// createJIT - This is the factory method for creating a JIT for the current @@ -420,7 +419,7 @@ ExecutionEngine *ExecutionEngine::create(Module *M, ExecutionEngine *ExecutionEngine::createJIT(Module *M, std::string *ErrorStr, JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel, + CodeGenOpt::Level OL, bool GVsWithCode, Reloc::Model RM, CodeModel::Model CMM) { @@ -432,18 +431,25 @@ ExecutionEngine *ExecutionEngine::createJIT(Module *M, // Use the defaults for extra parameters. Users can use EngineBuilder to // set them. - StringRef MArch = ""; - StringRef MCPU = ""; - SmallVector MAttrs; + EngineBuilder EB(M); + EB.setEngineKind(EngineKind::JIT); + EB.setErrorStr(ErrorStr); + EB.setRelocationModel(RM); + EB.setCodeModel(CMM); + EB.setAllocateGVsWithCode(GVsWithCode); + EB.setOptLevel(OL); + EB.setJITMemoryManager(JMM); - TargetMachine *TM = - EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, RM, CMM, ErrorStr); + // TODO: permit custom TargetOptions here + TargetMachine *TM = EB.selectTarget(); if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0; - return ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel, GVsWithCode, TM); + return ExecutionEngine::JITCtor(M, ErrorStr, JMM, GVsWithCode, TM); } -ExecutionEngine *EngineBuilder::create() { +ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { + OwningPtr TheTM(TM); // Take ownership. + // Make sure we can resolve symbols in the program as well. The zero arg // to the function tells DynamicLibrary to load the program, not a library. if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr)) @@ -464,21 +470,24 @@ ExecutionEngine *EngineBuilder::create() { // Unless the interpreter was explicitly selected or the JIT is not linked, // try making a JIT. - if (WhichEngine & EngineKind::JIT) { - if (TargetMachine *TM = EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, - RelocModel, CMModel, - ErrorStr)) { - if (UseMCJIT && ExecutionEngine::MCJITCtor) { - ExecutionEngine *EE = - ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, OptLevel, - AllocateGVsWithCode, TM); - if (EE) return EE; - } else if (ExecutionEngine::JITCtor) { - ExecutionEngine *EE = - ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel, - AllocateGVsWithCode, TM); - if (EE) return EE; - } + if ((WhichEngine & EngineKind::JIT) && TheTM) { + Triple TT(M->getTargetTriple()); + if (!TM->getTarget().hasJIT()) { + errs() << "WARNING: This target JIT is not designed for the host" + << " you are running. If bad things happen, please choose" + << " a different -march switch.\n"; + } + + if (UseMCJIT && ExecutionEngine::MCJITCtor) { + ExecutionEngine *EE = + ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, + AllocateGVsWithCode, TheTM.take()); + if (EE) return EE; + } else if (ExecutionEngine::JITCtor) { + ExecutionEngine *EE = + ExecutionEngine::JITCtor(M, ErrorStr, JMM, + AllocateGVsWithCode, TheTM.take()); + if (EE) return EE; } } @@ -944,30 +953,47 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result, void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) { DEBUG(dbgs() << "JIT: Initializing " << Addr << " "); DEBUG(Init->dump()); - if (isa(Init)) { + if (isa(Init)) return; - } else if (const ConstantVector *CP = dyn_cast(Init)) { + + if (const ConstantVector *CP = dyn_cast(Init)) { unsigned ElementSize = getTargetData()->getTypeAllocSize(CP->getType()->getElementType()); for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) InitializeMemory(CP->getOperand(i), (char*)Addr+i*ElementSize); return; - } else if (isa(Init)) { + } + + if (isa(Init)) { memset(Addr, 0, (size_t)getTargetData()->getTypeAllocSize(Init->getType())); return; - } else if (const ConstantArray *CPA = dyn_cast(Init)) { + } + + if (const ConstantArray *CPA = dyn_cast(Init)) { unsigned ElementSize = getTargetData()->getTypeAllocSize(CPA->getType()->getElementType()); for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i) InitializeMemory(CPA->getOperand(i), (char*)Addr+i*ElementSize); return; - } else if (const ConstantStruct *CPS = dyn_cast(Init)) { + } + + if (const ConstantStruct *CPS = dyn_cast(Init)) { const StructLayout *SL = getTargetData()->getStructLayout(cast(CPS->getType())); for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i) InitializeMemory(CPS->getOperand(i), (char*)Addr+SL->getElementOffset(i)); return; - } else if (Init->getType()->isFirstClassType()) { + } + + if (const ConstantDataSequential *CDS = + dyn_cast(Init)) { + // CDS is already laid out in host memory order. + StringRef Data = CDS->getRawDataValues(); + memcpy(Addr, Data.data(), Data.size()); + return; + } + + if (Init->getType()->isFirstClassType()) { GenericValue Val = getConstantValue(Init); StoreValueToMemory(Val, (GenericValue*)Addr, Init->getType()); return; @@ -1123,6 +1149,6 @@ void ExecutionEngineState::AddressMapConfig::onDelete(ExecutionEngineState *EES, void ExecutionEngineState::AddressMapConfig::onRAUW(ExecutionEngineState *, const GlobalValue *, const GlobalValue *) { - assert(false && "The ExecutionEngine doesn't know how to handle a" - " RAUW on a value it has a global mapping for."); + llvm_unreachable("The ExecutionEngine doesn't know how to handle a" + " RAUW on a value it has a global mapping for."); } diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp index f8f1f4a78ee5..75e680ab3612 100644 --- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -76,9 +76,7 @@ double LLVMGenericValueToFloat(LLVMTypeRef TyRef, LLVMGenericValueRef GenVal) { return unwrap(GenVal)->DoubleVal; default: llvm_unreachable("LLVMGenericValueToFloat supports only float and double."); - break; } - return 0; // Not reached } void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal) { diff --git a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp new file mode 100644 index 000000000000..5dfa78f34a33 --- /dev/null +++ b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -0,0 +1,183 @@ +//===-- IntelJITEventListener.cpp - Tell Intel profiler about JITed code --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a JITEventListener object to tell Intel(R) VTune(TM) +// Amplifier XE 2011 about JITted functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Config/config.h" +#include "llvm/ExecutionEngine/JITEventListener.h" + +#define DEBUG_TYPE "amplifier-jit-event-listener" +#include "llvm/Function.h" +#include "llvm/Metadata.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/ExecutionEngine/IntelJITEventsWrapper.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Errno.h" +#include "llvm/Support/ValueHandle.h" +#include "EventListenerCommon.h" + +using namespace llvm; +using namespace llvm::jitprofiling; + +namespace { + +class IntelJITEventListener : public JITEventListener { + typedef DenseMap MethodIDMap; + + IntelJITEventsWrapper& Wrapper; + MethodIDMap MethodIDs; + FilenameCache Filenames; + +public: + IntelJITEventListener(IntelJITEventsWrapper& libraryWrapper) + : Wrapper(libraryWrapper) { + } + + ~IntelJITEventListener() { + } + + virtual void NotifyFunctionEmitted(const Function &F, + void *FnStart, size_t FnSize, + const EmittedFunctionDetails &Details); + + virtual void NotifyFreeingMachineCode(void *OldPtr); +}; + +static LineNumberInfo LineStartToIntelJITFormat( + uintptr_t StartAddress, + uintptr_t Address, + DebugLoc Loc) { + LineNumberInfo Result; + + Result.Offset = Address - StartAddress; + Result.LineNumber = Loc.getLine(); + + return Result; +} + +static iJIT_Method_Load FunctionDescToIntelJITFormat( + IntelJITEventsWrapper& Wrapper, + const char* FnName, + uintptr_t FnStart, + size_t FnSize) { + iJIT_Method_Load Result; + memset(&Result, 0, sizeof(iJIT_Method_Load)); + + Result.method_id = Wrapper.iJIT_GetNewMethodID(); + Result.method_name = const_cast(FnName); + Result.method_load_address = reinterpret_cast(FnStart); + Result.method_size = FnSize; + + Result.class_id = 0; + Result.class_file_name = NULL; + Result.user_data = NULL; + Result.user_data_size = 0; + Result.env = iJDE_JittingAPI; + + return Result; +} + +// Adds the just-emitted function to the symbol table. +void IntelJITEventListener::NotifyFunctionEmitted( + const Function &F, void *FnStart, size_t FnSize, + const EmittedFunctionDetails &Details) { + iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(Wrapper, + F.getName().data(), + reinterpret_cast(FnStart), + FnSize); + + std::vector LineInfo; + + if (!Details.LineStarts.empty()) { + // Now convert the line number information from the address/DebugLoc + // format in Details to the offset/lineno in Intel JIT API format. + + LineInfo.reserve(Details.LineStarts.size() + 1); + + DebugLoc FirstLoc = Details.LineStarts[0].Loc; + assert(!FirstLoc.isUnknown() + && "LineStarts should not contain unknown DebugLocs"); + + MDNode *FirstLocScope = FirstLoc.getScope(F.getContext()); + DISubprogram FunctionDI = getDISubprogram(FirstLocScope); + if (FunctionDI.Verify()) { + FunctionMessage.source_file_name = const_cast( + Filenames.getFullPath(FirstLocScope)); + + LineNumberInfo FirstLine; + FirstLine.Offset = 0; + FirstLine.LineNumber = FunctionDI.getLineNumber(); + LineInfo.push_back(FirstLine); + } + + for (std::vector::const_iterator I = + Details.LineStarts.begin(), E = Details.LineStarts.end(); + I != E; ++I) { + // This implementation ignores the DebugLoc filename because the Intel + // JIT API does not support multiple source files associated with a single + // JIT function + LineInfo.push_back(LineStartToIntelJITFormat( + reinterpret_cast(FnStart), + I->Address, + I->Loc)); + + // If we have no file name yet for the function, use the filename from + // the first instruction that has one + if (FunctionMessage.source_file_name == 0) { + MDNode *scope = I->Loc.getScope( + Details.MF->getFunction()->getContext()); + FunctionMessage.source_file_name = const_cast( + Filenames.getFullPath(scope)); + } + } + + FunctionMessage.line_number_size = LineInfo.size(); + FunctionMessage.line_number_table = &*LineInfo.begin(); + } else { + FunctionMessage.line_number_size = 0; + FunctionMessage.line_number_table = 0; + } + + Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, + &FunctionMessage); + MethodIDs[FnStart] = FunctionMessage.method_id; +} + +void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) { + MethodIDMap::iterator I = MethodIDs.find(FnStart); + if (I != MethodIDs.end()) { + Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second); + MethodIDs.erase(I); + } +} + +} // anonymous namespace. + +namespace llvm { +JITEventListener *JITEventListener::createIntelJITEventListener() { + static OwningPtr JITProfilingWrapper( + new IntelJITEventsWrapper); + return new IntelJITEventListener(*JITProfilingWrapper); +} + +// for testing +JITEventListener *JITEventListener::createIntelJITEventListener( + IntelJITEventsWrapper* TestImpl) { + return new IntelJITEventListener(*TestImpl); +} + +} // namespace llvm + diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp index 27917da07a2c..af47be9c5b56 100644 --- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -625,24 +625,6 @@ void Interpreter::visitReturnInst(ReturnInst &I) { popStackAndReturnValueToCaller(RetTy, Result); } -void Interpreter::visitUnwindInst(UnwindInst &I) { - // Unwind stack - Instruction *Inst; - do { - ECStack.pop_back(); - if (ECStack.empty()) - report_fatal_error("Empty stack during unwind!"); - Inst = ECStack.back().Caller.getInstruction(); - } while (!(Inst && isa(Inst))); - - // Return from invoke - ExecutionContext &InvokingSF = ECStack.back(); - InvokingSF.Caller = CallSite(); - - // Go to exceptional destination BB of invoke instruction - SwitchToNewBasicBlock(cast(Inst)->getUnwindDest(), InvokingSF); -} - void Interpreter::visitUnreachableInst(UnreachableInst &I) { report_fatal_error("Program executed an 'unreachable' instruction!"); } @@ -668,12 +650,10 @@ void Interpreter::visitSwitchInst(SwitchInst &I) { // Check to see if any of the cases match... BasicBlock *Dest = 0; - unsigned NumCases = I.getNumCases(); - // Skip the first item since that's the default case. - for (unsigned i = 1; i < NumCases; ++i) { - GenericValue CaseVal = getOperandValue(I.getCaseValue(i), SF); + for (SwitchInst::CaseIt i = I.case_begin(), e = I.case_end(); i != e; ++i) { + GenericValue CaseVal = getOperandValue(i.getCaseValue(), SF); if (executeICMP_EQ(CondVal, CaseVal, ElTy).IntVal != 0) { - Dest = cast(I.getSuccessor(i)); + Dest = cast(i.getCaseSuccessor()); break; } } @@ -1253,8 +1233,7 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE, break; default: dbgs() << "Unhandled ConstantExpr: " << *CE << "\n"; - llvm_unreachable(0); - return GenericValue(); + llvm_unreachable("Unhandled ConstantExpr"); } return Dest; } diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index 055875c9456a..7a206ebf73d7 100644 --- a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -94,15 +94,16 @@ static ExFunc lookupFunction(const Function *F) { FunctionType *FT = F->getFunctionType(); for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i) ExtName += getTypeID(FT->getContainedType(i)); - ExtName + "_" + F->getNameStr(); + ExtName += "_" + F->getName().str(); sys::ScopedLock Writer(*FunctionsLock); ExFunc FnPtr = FuncNames[ExtName]; if (FnPtr == 0) - FnPtr = FuncNames["lle_X_" + F->getNameStr()]; + FnPtr = FuncNames["lle_X_" + F->getName().str()]; if (FnPtr == 0) // Try calling a generic function... if it exists... FnPtr = (ExFunc)(intptr_t) - sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_"+F->getNameStr()); + sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_" + + F->getName().str()); if (FnPtr != 0) ExportedFunctions->insert(std::make_pair(F, FnPtr)); // Cache for later return FnPtr; @@ -296,14 +297,8 @@ GenericValue Interpreter::callExternalFunction(Function *F, // Functions "exported" to the running application... // -// Visual Studio warns about returning GenericValue in extern "C" linkage -#ifdef _MSC_VER - #pragma warning(disable : 4190) -#endif - -extern "C" { // Don't add C++ manglings to llvm mangling :) - // void atexit(Function*) +static GenericValue lle_X_atexit(FunctionType *FT, const std::vector &Args) { assert(Args.size() == 1); @@ -314,6 +309,7 @@ GenericValue lle_X_atexit(FunctionType *FT, } // void exit(int) +static GenericValue lle_X_exit(FunctionType *FT, const std::vector &Args) { TheInterpreter->exitCalled(Args[0]); @@ -321,6 +317,7 @@ GenericValue lle_X_exit(FunctionType *FT, } // void abort(void) +static GenericValue lle_X_abort(FunctionType *FT, const std::vector &Args) { //FIXME: should we report or raise here? @@ -331,6 +328,7 @@ GenericValue lle_X_abort(FunctionType *FT, // int sprintf(char *, const char *, ...) - a very rough implementation to make // output useful. +static GenericValue lle_X_sprintf(FunctionType *FT, const std::vector &Args) { char *OutputBuffer = (char *)GVTOP(Args[0]); @@ -408,11 +406,11 @@ GenericValue lle_X_sprintf(FunctionType *FT, break; } } - return GV; } // int printf(const char *, ...) - a very rough implementation to make output // useful. +static GenericValue lle_X_printf(FunctionType *FT, const std::vector &Args) { char Buffer[10000]; @@ -425,6 +423,7 @@ GenericValue lle_X_printf(FunctionType *FT, } // int sscanf(const char *format, ...); +static GenericValue lle_X_sscanf(FunctionType *FT, const std::vector &args) { assert(args.size() < 10 && "Only handle up to 10 args to sscanf right now!"); @@ -440,6 +439,7 @@ GenericValue lle_X_sscanf(FunctionType *FT, } // int scanf(const char *format, ...); +static GenericValue lle_X_scanf(FunctionType *FT, const std::vector &args) { assert(args.size() < 10 && "Only handle up to 10 args to scanf right now!"); @@ -456,6 +456,7 @@ GenericValue lle_X_scanf(FunctionType *FT, // int fprintf(FILE *, const char *, ...) - a very rough implementation to make // output useful. +static GenericValue lle_X_fprintf(FunctionType *FT, const std::vector &Args) { assert(Args.size() >= 2); @@ -469,14 +470,6 @@ GenericValue lle_X_fprintf(FunctionType *FT, return GV; } -} // End extern "C" - -// Done with externals; turn the warning back on -#ifdef _MSC_VER - #pragma warning(default: 4190) -#endif - - void Interpreter::initializeExternalFunctions() { sys::ScopedLock Writer(*FunctionsLock); FuncNames["lle_X_atexit"] = lle_X_atexit; diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h index ee2b4596f38f..28c5775ab468 100644 --- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -115,6 +115,12 @@ class Interpreter : public ExecutionEngine, public InstVisitor { virtual GenericValue runFunction(Function *F, const std::vector &ArgValues); + virtual void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true) { + // FIXME: not implemented. + return 0; + } + /// recompileAndRelinkFunction - For the interpreter, functions are always /// up-to-date. /// @@ -165,7 +171,6 @@ class Interpreter : public ExecutionEngine, public InstVisitor { void visitCallSite(CallSite CS); void visitCallInst(CallInst &I) { visitCallSite (CallSite (&I)); } void visitInvokeInst(InvokeInst &I) { visitCallSite (CallSite (&I)); } - void visitUnwindInst(UnwindInst &I); void visitUnreachableInst(UnreachableInst &I); void visitShl(BinaryOperator &I); diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp deleted file mode 100644 index 2251a8e6b077..000000000000 --- a/contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp +++ /dev/null @@ -1,162 +0,0 @@ -//===-- Intercept.cpp - System function interception routines -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// If a function call occurs to an external function, the JIT is designed to use -// the dynamic loader interface to find a function to call. This is useful for -// calling system calls and library functions that are not available in LLVM. -// Some system calls, however, need to be handled specially. For this reason, -// we intercept some of them here and use our own stubs to handle them. -// -//===----------------------------------------------------------------------===// - -#include "JIT.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/DynamicLibrary.h" -#include "llvm/Config/config.h" -using namespace llvm; - -// AtExitHandlers - List of functions to call when the program exits, -// registered with the atexit() library function. -static std::vector AtExitHandlers; - -/// runAtExitHandlers - Run any functions registered by the program's -/// calls to atexit(3), which we intercept and store in -/// AtExitHandlers. -/// -static void runAtExitHandlers() { - while (!AtExitHandlers.empty()) { - void (*Fn)() = AtExitHandlers.back(); - AtExitHandlers.pop_back(); - Fn(); - } -} - -//===----------------------------------------------------------------------===// -// Function stubs that are invoked instead of certain library calls -//===----------------------------------------------------------------------===// - -// Force the following functions to be linked in to anything that uses the -// JIT. This is a hack designed to work around the all-too-clever Glibc -// strategy of making these functions work differently when inlined vs. when -// not inlined, and hiding their real definitions in a separate archive file -// that the dynamic linker can't see. For more info, search for -// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274. -#if defined(__linux__) -#if defined(HAVE_SYS_STAT_H) -#include -#endif -#include -#include -/* stat functions are redirecting to __xstat with a version number. On x86-64 - * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' - * available as an exported symbol, so we have to add it explicitly. - */ -namespace { -class StatSymbols { -public: - StatSymbols() { - sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat); - sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat); - sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat); - sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64); - sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64); - sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64); - sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64); - sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64); - sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64); - sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit); - sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod); - } -}; -} -static StatSymbols initStatSymbols; -#endif // __linux__ - -// jit_exit - Used to intercept the "exit" library call. -static void jit_exit(int Status) { - runAtExitHandlers(); // Run atexit handlers... - exit(Status); -} - -// jit_atexit - Used to intercept the "atexit" library call. -static int jit_atexit(void (*Fn)()) { - AtExitHandlers.push_back(Fn); // Take note of atexit handler... - return 0; // Always successful -} - -static int jit_noop() { - return 0; -} - -//===----------------------------------------------------------------------===// -// -/// getPointerToNamedFunction - This method returns the address of the specified -/// function by using the dynamic loader interface. As such it is only useful -/// for resolving library symbols, not code generated symbols. -/// -void *JIT::getPointerToNamedFunction(const std::string &Name, - bool AbortOnFailure) { - if (!isSymbolSearchingDisabled()) { - // Check to see if this is one of the functions we want to intercept. Note, - // we cast to intptr_t here to silence a -pedantic warning that complains - // about casting a function pointer to a normal pointer. - if (Name == "exit") return (void*)(intptr_t)&jit_exit; - if (Name == "atexit") return (void*)(intptr_t)&jit_atexit; - - // We should not invoke parent's ctors/dtors from generated main()! - // On Mingw and Cygwin, the symbol __main is resolved to - // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors - // (and register wrong callee's dtors with atexit(3)). - // We expect ExecutionEngine::runStaticConstructorsDestructors() - // is called before ExecutionEngine::runFunctionAsMain() is called. - if (Name == "__main") return (void*)(intptr_t)&jit_noop; - - const char *NameStr = Name.c_str(); - // If this is an asm specifier, skip the sentinal. - if (NameStr[0] == 1) ++NameStr; - - // If it's an external function, look it up in the process image... - void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); - if (Ptr) return Ptr; - - // If it wasn't found and if it starts with an underscore ('_') character, - // and has an asm specifier, try again without the underscore. - if (Name[0] == 1 && NameStr[0] == '_') { - Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); - if (Ptr) return Ptr; - } - - // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These - // are references to hidden visibility symbols that dlsym cannot resolve. - // If we have one of these, strip off $LDBLStub and try again. -#if defined(__APPLE__) && defined(__ppc__) - if (Name.size() > 9 && Name[Name.size()-9] == '$' && - memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) { - // First try turning $LDBLStub into $LDBL128. If that fails, strip it off. - // This mirrors logic in libSystemStubs.a. - std::string Prefix = std::string(Name.begin(), Name.end()-9); - if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false)) - return Ptr; - if (void *Ptr = getPointerToNamedFunction(Prefix, false)) - return Ptr; - } -#endif - } - - /// If a LazyFunctionCreator is installed, use it to get/create the function. - if (LazyFunctionCreator) - if (void *RP = LazyFunctionCreator(Name)) - return RP; - - if (AbortOnFailure) { - report_fatal_error("Program used external function '"+Name+ - "' which could not be resolved!"); - } - return 0; -} diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp index d773009065b5..a942299f3bbd 100644 --- a/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp +++ b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineCodeInfo.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetJITInfo.h" @@ -206,7 +207,6 @@ void DarwinRegisterFrame(void* FrameBegin) { ExecutionEngine *JIT::createJIT(Module *M, std::string *ErrorStr, JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel, bool GVsWithCode, TargetMachine *TM) { // Try to register the program as a source of symbols to resolve against. @@ -216,7 +216,7 @@ ExecutionEngine *JIT::createJIT(Module *M, // If the target supports JIT code generation, create the JIT. if (TargetJITInfo *TJ = TM->getJITInfo()) { - return new JIT(M, *TM, *TJ, JMM, OptLevel, GVsWithCode); + return new JIT(M, *TM, *TJ, JMM, GVsWithCode); } else { if (ErrorStr) *ErrorStr = "target does not support JIT code generation"; @@ -268,9 +268,10 @@ extern "C" { } JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, - JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode) - : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode), - isAlreadyCodeGenerating(false) { + JITMemoryManager *jmm, bool GVsWithCode) + : ExecutionEngine(M), TM(tm), TJI(tji), + JMM(jmm ? jmm : JITMemoryManager::CreateDefaultMemManager()), + AllocateGVsWithCode(GVsWithCode), isAlreadyCodeGenerating(false) { setTargetData(TM.getTargetData()); jitstate = new JITState(M); @@ -288,7 +289,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, // Turn the machine code intermediate representation into bytes in memory that // may be executed. - if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) { + if (TM.addPassesToEmitMachineCode(PM, *JCE)) { report_fatal_error("Target does not support machine code emission!"); } @@ -323,6 +324,7 @@ JIT::~JIT() { AllJits->Remove(this); delete jitstate; delete JCE; + // JMM is a ownership of JCE, so we no need delete JMM here. delete &TM; } @@ -341,7 +343,7 @@ void JIT::addModule(Module *M) { // Turn the machine code intermediate representation into bytes in memory // that may be executed. - if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) { + if (TM.addPassesToEmitMachineCode(PM, *JCE)) { report_fatal_error("Target does not support machine code emission!"); } @@ -372,7 +374,7 @@ bool JIT::removeModule(Module *M) { // Turn the machine code intermediate representation into bytes in memory // that may be executed. - if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) { + if (TM.addPassesToEmitMachineCode(PM, *JCE)) { report_fatal_error("Target does not support machine code emission!"); } @@ -476,7 +478,6 @@ GenericValue JIT::runFunction(Function *F, case Type::FP128TyID: case Type::PPC_FP128TyID: llvm_unreachable("long double not supported yet"); - return rv; case Type::PointerTyID: return PTOGV(((void*(*)())(intptr_t)FPtr)()); } @@ -708,12 +709,32 @@ void *JIT::getPointerToBasicBlock(BasicBlock *BB) { if (I != getBasicBlockAddressMap(locked).end()) { return I->second; } else { - assert(0 && "JIT does not have BB address for address-of-label, was" - " it eliminated by optimizer?"); - return 0; + llvm_unreachable("JIT does not have BB address for address-of-label, was" + " it eliminated by optimizer?"); } } +void *JIT::getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure){ + if (!isSymbolSearchingDisabled()) { + void *ptr = JMM->getPointerToNamedFunction(Name, false); + if (ptr) + return ptr; + } + + /// If a LazyFunctionCreator is installed, use it to get/create the function. + if (LazyFunctionCreator) + if (void *RP = LazyFunctionCreator(Name)) + return RP; + + if (AbortOnFailure) { + report_fatal_error("Program used external function '"+Name+ + "' which could not be resolved!"); + } + return 0; +} + + /// getOrEmitGlobalVariable - Return the address of the specified global /// variable, possibly emitting it to memory if needed. This is used by the /// Emitter. diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JIT.h b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.h index 92dcb0e99586..2ae155bebf40 100644 --- a/contrib/llvm/lib/ExecutionEngine/JIT/JIT.h +++ b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.h @@ -58,6 +58,7 @@ class JIT : public ExecutionEngine { TargetMachine &TM; // The current target we are compiling to TargetJITInfo &TJI; // The JITInfo for the target we are compiling to JITCodeEmitter *JCE; // JCE object + JITMemoryManager *JMM; std::vector EventListeners; /// AllocateGVsWithCode - Some applications require that global variables and @@ -78,8 +79,7 @@ class JIT : public ExecutionEngine { JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, - JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, - bool AllocateGVsWithCode); + JITMemoryManager *JMM, bool AllocateGVsWithCode); public: ~JIT(); @@ -118,15 +118,15 @@ class JIT : public ExecutionEngine { const std::vector &ArgValues); /// getPointerToNamedFunction - This method returns the address of the - /// specified function by using the dlsym function call. As such it is only + /// specified function by using the MemoryManager. As such it is only /// useful for resolving library symbols, not code generated symbols. /// /// If AbortOnFailure is false and no function with the given name is /// found, this function silently returns a null pointer. Otherwise, /// it prints a message to stderr and aborts. /// - void *getPointerToNamedFunction(const std::string &Name, - bool AbortOnFailure = true); + virtual void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true); // CompilationCallback - Invoked the first time that a call site is found, // which causes lazy compilation of the target function. @@ -185,7 +185,6 @@ class JIT : public ExecutionEngine { static ExecutionEngine *createJIT(Module *M, std::string *ErrorStr, JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel, bool GVsWithCode, TargetMachine *TM); diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp deleted file mode 100644 index e71c20b89fda..000000000000 --- a/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp +++ /dev/null @@ -1,211 +0,0 @@ -//===-- JITDebugRegisterer.cpp - Register debug symbols for JIT -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines a JITDebugRegisterer object that is used by the JIT to -// register debug info with debuggers like GDB. -// -//===----------------------------------------------------------------------===// - -#include "JITDebugRegisterer.h" -#include "../../CodeGen/ELF.h" -#include "../../CodeGen/ELFWriter.h" -#include "llvm/LLVMContext.h" -#include "llvm/Function.h" -#include "llvm/Module.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/MutexGuard.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Mutex.h" -#include - -namespace llvm { - -// This must be kept in sync with gdb/gdb/jit.h . -extern "C" { - - // Debuggers puts a breakpoint in this function. - LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() { } - - // We put information about the JITed function in this global, which the - // debugger reads. Make sure to specify the version statically, because the - // debugger checks the version before we can set it during runtime. - struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; - -} - -namespace { - - /// JITDebugLock - Used to serialize all code registration events, since they - /// modify global variables. - sys::Mutex JITDebugLock; - -} - -JITDebugRegisterer::JITDebugRegisterer(TargetMachine &tm) : TM(tm), FnMap() { } - -JITDebugRegisterer::~JITDebugRegisterer() { - // Free all ELF memory. - for (RegisteredFunctionsMap::iterator I = FnMap.begin(), E = FnMap.end(); - I != E; ++I) { - // Call the private method that doesn't update the map so our iterator - // doesn't break. - UnregisterFunctionInternal(I); - } - FnMap.clear(); -} - -std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) { - // Stack allocate an empty module with an empty LLVMContext for the ELFWriter - // API. We don't use the real module because then the ELFWriter would write - // out unnecessary GlobalValues during finalization. - LLVMContext Context; - Module M("", Context); - - // Make a buffer for the ELF in memory. - std::string Buffer; - raw_string_ostream O(Buffer); - ELFWriter EW(O, TM); - EW.doInitialization(M); - - // Copy the binary into the .text section. This isn't necessary, but it's - // useful to be able to disassemble the ELF by hand. - ELFSection &Text = EW.getTextSection(const_cast(F)); - Text.Addr = (uint64_t)I.FnStart; - // TODO: We could eliminate this copy if we somehow used a pointer/size pair - // instead of a vector. - Text.getData().assign(I.FnStart, I.FnEnd); - - // Copy the exception handling call frame information into the .eh_frame - // section. This allows GDB to get a good stack trace, particularly on - // linux x86_64. Mark this as a PROGBITS section that needs to be loaded - // into memory at runtime. - ELFSection &EH = EW.getSection(".eh_frame", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC); - // Pointers in the DWARF EH info are all relative to the EH frame start, - // which is stored here. - EH.Addr = (uint64_t)I.EhStart; - // TODO: We could eliminate this copy if we somehow used a pointer/size pair - // instead of a vector. - EH.getData().assign(I.EhStart, I.EhEnd); - - // Add this single function to the symbol table, so the debugger prints the - // name instead of '???'. We give the symbol default global visibility. - ELFSym *FnSym = ELFSym::getGV(F, - ELF::STB_GLOBAL, - ELF::STT_FUNC, - ELF::STV_DEFAULT); - FnSym->SectionIdx = Text.SectionIdx; - FnSym->Size = I.FnEnd - I.FnStart; - FnSym->Value = 0; // Offset from start of section. - EW.SymbolList.push_back(FnSym); - - EW.doFinalization(M); - O.flush(); - - // When trying to debug why GDB isn't getting the debug info right, it's - // awfully helpful to write the object file to disk so that it can be - // inspected with readelf and objdump. - if (JITEmitDebugInfoToDisk) { - std::string Filename; - raw_string_ostream O2(Filename); - O2 << "/tmp/llvm_function_" << I.FnStart << "_" << F->getNameStr() << ".o"; - O2.flush(); - std::string Errors; - raw_fd_ostream O3(Filename.c_str(), Errors); - O3 << Buffer; - O3.close(); - } - - return Buffer; -} - -void JITDebugRegisterer::RegisterFunction(const Function *F, DebugInfo &I) { - // TODO: Support non-ELF platforms. - if (!TM.getELFWriterInfo()) - return; - - std::string Buffer = MakeELF(F, I); - - jit_code_entry *JITCodeEntry = new jit_code_entry(); - JITCodeEntry->symfile_addr = Buffer.c_str(); - JITCodeEntry->symfile_size = Buffer.size(); - - // Add a mapping from F to the entry and buffer, so we can delete this - // info later. - FnMap[F] = std::make_pair(Buffer, JITCodeEntry); - - // Acquire the lock and do the registration. - { - MutexGuard locked(JITDebugLock); - __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; - - // Insert this entry at the head of the list. - JITCodeEntry->prev_entry = NULL; - jit_code_entry *NextEntry = __jit_debug_descriptor.first_entry; - JITCodeEntry->next_entry = NextEntry; - if (NextEntry != NULL) { - NextEntry->prev_entry = JITCodeEntry; - } - __jit_debug_descriptor.first_entry = JITCodeEntry; - __jit_debug_descriptor.relevant_entry = JITCodeEntry; - __jit_debug_register_code(); - } -} - -void JITDebugRegisterer::UnregisterFunctionInternal( - RegisteredFunctionsMap::iterator I) { - jit_code_entry *&JITCodeEntry = I->second.second; - - // Acquire the lock and do the unregistration. - { - MutexGuard locked(JITDebugLock); - __jit_debug_descriptor.action_flag = JIT_UNREGISTER_FN; - - // Remove the jit_code_entry from the linked list. - jit_code_entry *PrevEntry = JITCodeEntry->prev_entry; - jit_code_entry *NextEntry = JITCodeEntry->next_entry; - if (NextEntry) { - NextEntry->prev_entry = PrevEntry; - } - if (PrevEntry) { - PrevEntry->next_entry = NextEntry; - } else { - assert(__jit_debug_descriptor.first_entry == JITCodeEntry); - __jit_debug_descriptor.first_entry = NextEntry; - } - - // Tell GDB which entry we removed, and unregister the code. - __jit_debug_descriptor.relevant_entry = JITCodeEntry; - __jit_debug_register_code(); - } - - delete JITCodeEntry; - JITCodeEntry = NULL; - - // Free the ELF file in memory. - std::string &Buffer = I->second.first; - Buffer.clear(); -} - -void JITDebugRegisterer::UnregisterFunction(const Function *F) { - // TODO: Support non-ELF platforms. - if (!TM.getELFWriterInfo()) - return; - - RegisteredFunctionsMap::iterator I = FnMap.find(F); - if (I == FnMap.end()) return; - UnregisterFunctionInternal(I); - FnMap.erase(I); -} - -} // end namespace llvm diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.h b/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.h deleted file mode 100644 index dce506bbfefd..000000000000 --- a/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.h +++ /dev/null @@ -1,116 +0,0 @@ -//===-- JITDebugRegisterer.h - Register debug symbols for JIT -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines a JITDebugRegisterer object that is used by the JIT to -// register debug info with debuggers like GDB. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H -#define LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/Support/DataTypes.h" -#include - -// This must be kept in sync with gdb/gdb/jit.h . -extern "C" { - - typedef enum { - JIT_NOACTION = 0, - JIT_REGISTER_FN, - JIT_UNREGISTER_FN - } jit_actions_t; - - struct jit_code_entry { - struct jit_code_entry *next_entry; - struct jit_code_entry *prev_entry; - const char *symfile_addr; - uint64_t symfile_size; - }; - - struct jit_descriptor { - uint32_t version; - // This should be jit_actions_t, but we want to be specific about the - // bit-width. - uint32_t action_flag; - struct jit_code_entry *relevant_entry; - struct jit_code_entry *first_entry; - }; - -} - -namespace llvm { - -class ELFSection; -class Function; -class TargetMachine; - - -/// This class encapsulates information we want to send to the debugger. -/// -struct DebugInfo { - uint8_t *FnStart; - uint8_t *FnEnd; - uint8_t *EhStart; - uint8_t *EhEnd; - - DebugInfo() : FnStart(0), FnEnd(0), EhStart(0), EhEnd(0) {} -}; - -typedef DenseMap< const Function*, std::pair > - RegisteredFunctionsMap; - -/// This class registers debug info for JITed code with an attached debugger. -/// Without proper debug info, GDB can't do things like source level debugging -/// or even produce a proper stack trace on linux-x86_64. To use this class, -/// whenever a function is JITed, create a DebugInfo struct and pass it to the -/// RegisterFunction method. The method will then do whatever is necessary to -/// inform the debugger about the JITed function. -class JITDebugRegisterer { - - TargetMachine &TM; - - /// FnMap - A map of functions that have been registered to the associated - /// temporary files. Used for cleanup. - RegisteredFunctionsMap FnMap; - - /// MakeELF - Builds the ELF file in memory and returns a std::string that - /// contains the ELF. - std::string MakeELF(const Function *F, DebugInfo &I); - -public: - JITDebugRegisterer(TargetMachine &tm); - - /// ~JITDebugRegisterer - Unregisters all code and frees symbol files. - /// - ~JITDebugRegisterer(); - - /// RegisterFunction - Register debug info for the given function with an - /// attached debugger. Clients must call UnregisterFunction on all - /// registered functions before deleting them to free the associated symbol - /// file and unregister it from the debugger. - void RegisterFunction(const Function *F, DebugInfo &I); - - /// UnregisterFunction - Unregister the debug info for the given function - /// from the debugger and free associated memory. - void UnregisterFunction(const Function *F); - -private: - /// UnregisterFunctionInternal - Unregister the debug info for the given - /// function from the debugger and delete any temporary files. The private - /// version of this method does not remove the function from FnMap so that it - /// can be called while iterating over FnMap. - void UnregisterFunctionInternal(RegisteredFunctionsMap::iterator I); - -}; - -} // end namespace llvm - -#endif // LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp index 8f84ac7b4126..42a136e72d45 100644 --- a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp +++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp @@ -313,7 +313,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); MI != E; ++MI) { if (!MI->isLabel()) { - MayThrow |= MI->getDesc().isCall(); + MayThrow |= MI->isCall(); continue; } diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp index 24020ee6d689..504c8bdffd1b 100644 --- a/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "jit" #include "JIT.h" -#include "JITDebugRegisterer.h" #include "JITDwarfEmitter.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/Constants.h" @@ -77,8 +76,8 @@ namespace { struct NoRAUWValueMapConfig : public ValueMapConfig { typedef JITResolverState *ExtraData; static void onRAUW(JITResolverState *, Value *Old, Value *New) { - assert(false && "The JIT doesn't know how to handle a" - " RAUW on a value it has emitted."); + llvm_unreachable("The JIT doesn't know how to handle a" + " RAUW on a value it has emitted."); } }; @@ -324,9 +323,6 @@ namespace { /// DE - The dwarf emitter for the jit. OwningPtr DE; - /// DR - The debug registerer for the jit. - OwningPtr DR; - /// LabelLocations - This vector is a mapping from Label ID's to their /// address. DenseMap LabelLocations; @@ -362,22 +358,22 @@ namespace { /// Instance of the JIT JIT *TheJIT; + bool JITExceptionHandling; + public: JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM) : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0), - EmittedFunctions(this), TheJIT(&jit) { + EmittedFunctions(this), TheJIT(&jit), + JITExceptionHandling(TM.Options.JITExceptionHandling) { MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager(); if (jit.getJITInfo().needsGOT()) { MemMgr->AllocateGOT(); DEBUG(dbgs() << "JIT is managing a GOT\n"); } - if (JITExceptionHandling || JITEmitDebugInfo) { + if (JITExceptionHandling) { DE.reset(new JITDwarfEmitter(jit)); } - if (JITEmitDebugInfo) { - DR.reset(new JITDebugRegisterer(TM)); - } } ~JITEmitter() { delete MemMgr; @@ -968,7 +964,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { } }); - if (JITExceptionHandling || JITEmitDebugInfo) { + if (JITExceptionHandling) { uintptr_t ActualSize = 0; SavedBufferBegin = BufferBegin; SavedBufferEnd = BufferEnd; @@ -983,7 +979,6 @@ bool JITEmitter::finishFunction(MachineFunction &F) { EhStart); MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr, FrameRegister); - uint8_t *EhEnd = CurBufferPtr; BufferBegin = SavedBufferBegin; BufferEnd = SavedBufferEnd; CurBufferPtr = SavedCurBufferPtr; @@ -991,15 +986,6 @@ bool JITEmitter::finishFunction(MachineFunction &F) { if (JITExceptionHandling) { TheJIT->RegisterTable(F.getFunction(), FrameRegister); } - - if (JITEmitDebugInfo) { - DebugInfo I; - I.FnStart = FnStart; - I.FnEnd = FnEnd; - I.EhStart = EhStart; - I.EhEnd = EhEnd; - DR->RegisterFunction(F.getFunction(), I); - } } if (MMI) @@ -1037,17 +1023,13 @@ void JITEmitter::deallocateMemForFunction(const Function *F) { EmittedFunctions.erase(Emitted); } - if(JITExceptionHandling) { + if (JITExceptionHandling) { TheJIT->DeregisterTable(F); } - - if (JITEmitDebugInfo) { - DR->UnregisterFunction(F); - } } -void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) { +void *JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) { if (BufferBegin) return JITCodeEmitter::allocateSpace(Size, Alignment); @@ -1059,7 +1041,7 @@ void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) { return CurBufferPtr; } -void* JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) { +void *JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) { // Delegate this call through the memory manager. return MemMgr->allocateGlobal(Size, Alignment); } @@ -1179,6 +1161,9 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { } break; } + case MachineJumpTableInfo::EK_GPRel64BlockAddress: + llvm_unreachable( + "JT Info emission not implemented for GPRel64BlockAddress yet."); } } diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp index eec23cec0af9..2d1775c05c10 100644 --- a/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp +++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp @@ -23,10 +23,22 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Memory.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Config/config.h" #include #include #include #include + +#if defined(__linux__) +#if defined(HAVE_SYS_STAT_H) +#include +#endif +#include +#include +#endif + using namespace llvm; STATISTIC(NumSlabs, "Number of slabs of memory allocated by the JIT"); @@ -314,6 +326,11 @@ namespace { /// should allocate a separate slab. static const size_t DefaultSizeThreshold; + /// getPointerToNamedFunction - This method returns the address of the + /// specified function by using the dlsym function call. + virtual void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true); + void AllocateGOT(); // Testing methods. @@ -441,6 +458,50 @@ namespace { return (uint8_t*)DataAllocator.Allocate(Size, Alignment); } + /// allocateCodeSection - Allocate memory for a code section. + uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID) { + // FIXME: Alignement handling. + FreeRangeHeader* candidateBlock = FreeMemoryList; + FreeRangeHeader* head = FreeMemoryList; + FreeRangeHeader* iter = head->Next; + + uintptr_t largest = candidateBlock->BlockSize; + + // Search for the largest free block. + while (iter != head) { + if (iter->BlockSize > largest) { + largest = iter->BlockSize; + candidateBlock = iter; + } + iter = iter->Next; + } + + largest = largest - sizeof(MemoryRangeHeader); + + // If this block isn't big enough for the allocation desired, allocate + // another block of memory and add it to the free list. + if (largest < Size || largest <= FreeRangeHeader::getMinBlockSize()) { + DEBUG(dbgs() << "JIT: Allocating another slab of memory for function."); + candidateBlock = allocateNewCodeSlab((size_t)Size); + } + + // Select this candidate block for allocation + CurBlock = candidateBlock; + + // Allocate the entire memory block. + FreeMemoryList = candidateBlock->AllocateBlock(); + // Release the memory at the end of this block that isn't needed. + FreeMemoryList = CurBlock->TrimAllocationToSize(FreeMemoryList, Size); + return (uint8_t *)(CurBlock + 1); + } + + /// allocateDataSection - Allocate memory for a data section. + uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID) { + return (uint8_t*)DataAllocator.Allocate(Size, Alignment); + } + /// startExceptionTable - Use startFunctionBody to allocate memory for the /// function's exception table. uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) { @@ -713,6 +774,139 @@ bool DefaultJITMemoryManager::CheckInvariants(std::string &ErrorStr) { return true; } +//===----------------------------------------------------------------------===// +// getPointerToNamedFunction() implementation. +//===----------------------------------------------------------------------===// + +// AtExitHandlers - List of functions to call when the program exits, +// registered with the atexit() library function. +static std::vector AtExitHandlers; + +/// runAtExitHandlers - Run any functions registered by the program's +/// calls to atexit(3), which we intercept and store in +/// AtExitHandlers. +/// +static void runAtExitHandlers() { + while (!AtExitHandlers.empty()) { + void (*Fn)() = AtExitHandlers.back(); + AtExitHandlers.pop_back(); + Fn(); + } +} + +//===----------------------------------------------------------------------===// +// Function stubs that are invoked instead of certain library calls +// +// Force the following functions to be linked in to anything that uses the +// JIT. This is a hack designed to work around the all-too-clever Glibc +// strategy of making these functions work differently when inlined vs. when +// not inlined, and hiding their real definitions in a separate archive file +// that the dynamic linker can't see. For more info, search for +// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274. +#if defined(__linux__) +/* stat functions are redirecting to __xstat with a version number. On x86-64 + * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' + * available as an exported symbol, so we have to add it explicitly. + */ +namespace { +class StatSymbols { +public: + StatSymbols() { + sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat); + sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat); + sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat); + sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64); + sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64); + sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64); + sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64); + sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64); + sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64); + sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit); + sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod); + } +}; +} +static StatSymbols initStatSymbols; +#endif // __linux__ + +// jit_exit - Used to intercept the "exit" library call. +static void jit_exit(int Status) { + runAtExitHandlers(); // Run atexit handlers... + exit(Status); +} + +// jit_atexit - Used to intercept the "atexit" library call. +static int jit_atexit(void (*Fn)()) { + AtExitHandlers.push_back(Fn); // Take note of atexit handler... + return 0; // Always successful +} + +static int jit_noop() { + return 0; +} + +//===----------------------------------------------------------------------===// +// +/// getPointerToNamedFunction - This method returns the address of the specified +/// function by using the dynamic loader interface. As such it is only useful +/// for resolving library symbols, not code generated symbols. +/// +void *DefaultJITMemoryManager::getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure) { + // Check to see if this is one of the functions we want to intercept. Note, + // we cast to intptr_t here to silence a -pedantic warning that complains + // about casting a function pointer to a normal pointer. + if (Name == "exit") return (void*)(intptr_t)&jit_exit; + if (Name == "atexit") return (void*)(intptr_t)&jit_atexit; + + // We should not invoke parent's ctors/dtors from generated main()! + // On Mingw and Cygwin, the symbol __main is resolved to + // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors + // (and register wrong callee's dtors with atexit(3)). + // We expect ExecutionEngine::runStaticConstructorsDestructors() + // is called before ExecutionEngine::runFunctionAsMain() is called. + if (Name == "__main") return (void*)(intptr_t)&jit_noop; + + const char *NameStr = Name.c_str(); + // If this is an asm specifier, skip the sentinal. + if (NameStr[0] == 1) ++NameStr; + + // If it's an external function, look it up in the process image... + void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); + if (Ptr) return Ptr; + + // If it wasn't found and if it starts with an underscore ('_') character, + // try again without the underscore. + if (NameStr[0] == '_') { + Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); + if (Ptr) return Ptr; + } + + // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These + // are references to hidden visibility symbols that dlsym cannot resolve. + // If we have one of these, strip off $LDBLStub and try again. +#if defined(__APPLE__) && defined(__ppc__) + if (Name.size() > 9 && Name[Name.size()-9] == '$' && + memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) { + // First try turning $LDBLStub into $LDBL128. If that fails, strip it off. + // This mirrors logic in libSystemStubs.a. + std::string Prefix = std::string(Name.begin(), Name.end()-9); + if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false)) + return Ptr; + if (void *Ptr = getPointerToNamedFunction(Prefix, false)) + return Ptr; + } +#endif + + if (AbortOnFailure) { + report_fatal_error("Program used external function '"+Name+ + "' which could not be resolved!"); + } + return 0; +} + + + JITMemoryManager *JITMemoryManager::CreateDefaultMemManager() { return new DefaultJITMemoryManager(); } diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/Intercept.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/Intercept.cpp deleted file mode 100644 index f83f4282e016..000000000000 --- a/contrib/llvm/lib/ExecutionEngine/MCJIT/Intercept.cpp +++ /dev/null @@ -1,162 +0,0 @@ -//===-- Intercept.cpp - System function interception routines -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// If a function call occurs to an external function, the JIT is designed to use -// the dynamic loader interface to find a function to call. This is useful for -// calling system calls and library functions that are not available in LLVM. -// Some system calls, however, need to be handled specially. For this reason, -// we intercept some of them here and use our own stubs to handle them. -// -//===----------------------------------------------------------------------===// - -#include "MCJIT.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/DynamicLibrary.h" -#include "llvm/Config/config.h" -using namespace llvm; - -// AtExitHandlers - List of functions to call when the program exits, -// registered with the atexit() library function. -static std::vector AtExitHandlers; - -/// runAtExitHandlers - Run any functions registered by the program's -/// calls to atexit(3), which we intercept and store in -/// AtExitHandlers. -/// -static void runAtExitHandlers() { - while (!AtExitHandlers.empty()) { - void (*Fn)() = AtExitHandlers.back(); - AtExitHandlers.pop_back(); - Fn(); - } -} - -//===----------------------------------------------------------------------===// -// Function stubs that are invoked instead of certain library calls -//===----------------------------------------------------------------------===// - -// Force the following functions to be linked in to anything that uses the -// JIT. This is a hack designed to work around the all-too-clever Glibc -// strategy of making these functions work differently when inlined vs. when -// not inlined, and hiding their real definitions in a separate archive file -// that the dynamic linker can't see. For more info, search for -// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274. -#if defined(__linux__) -#if defined(HAVE_SYS_STAT_H) -#include -#endif -#include -#include -/* stat functions are redirecting to __xstat with a version number. On x86-64 - * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' - * available as an exported symbol, so we have to add it explicitly. - */ -namespace { -class StatSymbols { -public: - StatSymbols() { - sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat); - sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat); - sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat); - sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64); - sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64); - sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64); - sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64); - sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64); - sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64); - sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit); - sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod); - } -}; -} -static StatSymbols initStatSymbols; -#endif // __linux__ - -// jit_exit - Used to intercept the "exit" library call. -static void jit_exit(int Status) { - runAtExitHandlers(); // Run atexit handlers... - exit(Status); -} - -// jit_atexit - Used to intercept the "atexit" library call. -static int jit_atexit(void (*Fn)()) { - AtExitHandlers.push_back(Fn); // Take note of atexit handler... - return 0; // Always successful -} - -static int jit_noop() { - return 0; -} - -//===----------------------------------------------------------------------===// -// -/// getPointerToNamedFunction - This method returns the address of the specified -/// function by using the dynamic loader interface. As such it is only useful -/// for resolving library symbols, not code generated symbols. -/// -void *MCJIT::getPointerToNamedFunction(const std::string &Name, - bool AbortOnFailure) { - if (!isSymbolSearchingDisabled()) { - // Check to see if this is one of the functions we want to intercept. Note, - // we cast to intptr_t here to silence a -pedantic warning that complains - // about casting a function pointer to a normal pointer. - if (Name == "exit") return (void*)(intptr_t)&jit_exit; - if (Name == "atexit") return (void*)(intptr_t)&jit_atexit; - - // We should not invoke parent's ctors/dtors from generated main()! - // On Mingw and Cygwin, the symbol __main is resolved to - // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors - // (and register wrong callee's dtors with atexit(3)). - // We expect ExecutionEngine::runStaticConstructorsDestructors() - // is called before ExecutionEngine::runFunctionAsMain() is called. - if (Name == "__main") return (void*)(intptr_t)&jit_noop; - - const char *NameStr = Name.c_str(); - // If this is an asm specifier, skip the sentinal. - if (NameStr[0] == 1) ++NameStr; - - // If it's an external function, look it up in the process image... - void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); - if (Ptr) return Ptr; - - // If it wasn't found and if it starts with an underscore ('_') character, - // and has an asm specifier, try again without the underscore. - if (Name[0] == 1 && NameStr[0] == '_') { - Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); - if (Ptr) return Ptr; - } - - // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These - // are references to hidden visibility symbols that dlsym cannot resolve. - // If we have one of these, strip off $LDBLStub and try again. -#if defined(__APPLE__) && defined(__ppc__) - if (Name.size() > 9 && Name[Name.size()-9] == '$' && - memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) { - // First try turning $LDBLStub into $LDBL128. If that fails, strip it off. - // This mirrors logic in libSystemStubs.a. - std::string Prefix = std::string(Name.begin(), Name.end()-9); - if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false)) - return Ptr; - if (void *Ptr = getPointerToNamedFunction(Prefix, false)) - return Ptr; - } -#endif - } - - /// If a LazyFunctionCreator is installed, use it to get/create the function. - if (LazyFunctionCreator) - if (void *RP = LazyFunctionCreator(Name)) - return RP; - - if (AbortOnFailure) { - report_fatal_error("Program used external function '"+Name+ - "' which could not be resolved!"); - } - return 0; -} diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 7c8a740dc862..44f89cf78309 100644 --- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -36,7 +36,6 @@ extern "C" void LLVMLinkInMCJIT() { ExecutionEngine *MCJIT::createJIT(Module *M, std::string *ErrorStr, JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel, bool GVsWithCode, TargetMachine *TM) { // Try to register the program as a source of symbols to resolve against. @@ -46,8 +45,7 @@ ExecutionEngine *MCJIT::createJIT(Module *M, // If the target supports JIT code generation, create the JIT. if (TargetJITInfo *TJ = TM->getJITInfo()) - return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM, M), OptLevel, - GVsWithCode); + return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM, M), GVsWithCode); if (ErrorStr) *ErrorStr = "target does not support JIT code generation"; @@ -55,8 +53,7 @@ ExecutionEngine *MCJIT::createJIT(Module *M, } MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji, - RTDyldMemoryManager *MM, CodeGenOpt::Level OptLevel, - bool AllocateGVsWithCode) + RTDyldMemoryManager *MM, bool AllocateGVsWithCode) : ExecutionEngine(m), TM(tm), MemMgr(MM), M(m), OS(Buffer), Dyld(MM) { setTargetData(TM->getTargetData()); @@ -64,7 +61,7 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji, // Turn the machine code intermediate representation into bytes in memory // that may be executed. - if (TM->addPassesToEmitMC(PM, Ctx, OS, CodeGenOpt::Default, false)) { + if (TM->addPassesToEmitMC(PM, Ctx, OS, false)) { report_fatal_error("Target does not support MC emission!"); } @@ -77,9 +74,9 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji, OS.flush(); // Load the object into the dynamic linker. - // FIXME: It would be nice to avoid making yet another copy. - MemoryBuffer *MB = MemoryBuffer::getMemBufferCopy(StringRef(Buffer.data(), - Buffer.size())); + MemoryBuffer *MB = MemoryBuffer::getMemBuffer(StringRef(Buffer.data(), + Buffer.size()), + "", false); if (Dyld.loadObject(MB)) report_fatal_error(Dyld.getErrorString()); // Resolve any relocations. @@ -88,11 +85,11 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji, MCJIT::~MCJIT() { delete MemMgr; + delete TM; } void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) { report_fatal_error("not yet implemented"); - return 0; } void *MCJIT::getPointerToFunction(Function *F) { @@ -211,12 +208,30 @@ GenericValue MCJIT::runFunction(Function *F, case Type::FP128TyID: case Type::PPC_FP128TyID: llvm_unreachable("long double not supported yet"); - return rv; case Type::PointerTyID: return PTOGV(((void*(*)())(intptr_t)FPtr)()); } } - assert(0 && "Full-featured argument passing not supported yet!"); - return GenericValue(); + llvm_unreachable("Full-featured argument passing not supported yet!"); +} + +void *MCJIT::getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure){ + if (!isSymbolSearchingDisabled() && MemMgr) { + void *ptr = MemMgr->getPointerToNamedFunction(Name, false); + if (ptr) + return ptr; + } + + /// If a LazyFunctionCreator is installed, use it to get/create the function. + if (LazyFunctionCreator) + if (void *RP = LazyFunctionCreator(Name)) + return RP; + + if (AbortOnFailure) { + report_fatal_error("Program used external function '"+Name+ + "' which could not be resolved!"); + } + return 0; } diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h index b64c21a97360..2b3df9884eb2 100644 --- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -24,8 +24,7 @@ namespace llvm { class MCJIT : public ExecutionEngine { MCJIT(Module *M, TargetMachine *tm, TargetJITInfo &tji, - RTDyldMemoryManager *MemMgr, CodeGenOpt::Level OptLevel, - bool AllocateGVsWithCode); + RTDyldMemoryManager *MemMgr, bool AllocateGVsWithCode); TargetMachine *TM; MCContext *Ctx; @@ -66,8 +65,17 @@ class MCJIT : public ExecutionEngine { /// found, this function silently returns a null pointer. Otherwise, /// it prints a message to stderr and aborts. /// - void *getPointerToNamedFunction(const std::string &Name, - bool AbortOnFailure = true); + virtual void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true); + + /// mapSectionAddress - map a section to its target address space value. + /// Map the address of a JIT section as returned from the memory manager + /// to the address in the target process as the running code will see it. + /// This is the address which will be used for relocation resolution. + virtual void mapSectionAddress(void *LocalAddress, uint64_t TargetAddress) { + Dyld.mapSectionAddress(LocalAddress, TargetAddress); + } + /// @} /// @name (Private) Registration Interfaces /// @{ @@ -79,7 +87,6 @@ class MCJIT : public ExecutionEngine { static ExecutionEngine *createJIT(Module *M, std::string *ErrorStr, JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel, bool GVsWithCode, TargetMachine *TM); diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.cpp new file mode 100644 index 000000000000..457fe5e3ef06 --- /dev/null +++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.cpp @@ -0,0 +1,14 @@ +//==-- MCJITMemoryManager.cpp - Definition for the Memory Manager -*-C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCJITMemoryManager.h" + +using namespace llvm; + +void MCJITMemoryManager::anchor() { } diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h index 40bc031a0771..a68949aa41c8 100644 --- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h +++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h @@ -21,45 +21,30 @@ namespace llvm { // and the RuntimeDyld interface that maps objects, by name, onto their // matching LLVM IR counterparts in the module(s) being compiled. class MCJITMemoryManager : public RTDyldMemoryManager { + virtual void anchor(); JITMemoryManager *JMM; // FIXME: Multiple modules. Module *M; public: - MCJITMemoryManager(JITMemoryManager *jmm, Module *m) : JMM(jmm), M(m) {} + MCJITMemoryManager(JITMemoryManager *jmm, Module *m) : + JMM(jmm?jmm:JITMemoryManager::CreateDefaultMemManager()), M(m) {} + // We own the JMM, so make sure to delete it. + ~MCJITMemoryManager() { delete JMM; } - // Allocate ActualSize bytes, or more, for the named function. Return - // a pointer to the allocated memory and update Size to reflect how much - // memory was acutally allocated. - uint8_t *startFunctionBody(const char *Name, uintptr_t &Size) { - // FIXME: This should really reference the MCAsmInfo to get the global - // prefix. - if (Name[0] == '_') ++Name; - Function *F = M->getFunction(Name); - // Some ObjC names have a prefixed \01 in the IR. If we failed to find - // the symbol and it's of the ObjC conventions (starts with "-"), try - // prepending a \01 and see if we can find it that way. - if (!F && Name[0] == '-') - F = M->getFunction((Twine("\1") + Name).str()); - assert(F && "No matching function in JIT IR Module!"); - return JMM->startFunctionBody(F, Size); + uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID) { + return JMM->allocateSpace(Size, Alignment); } - // Mark the end of the function, including how much of the allocated - // memory was actually used. - void endFunctionBody(const char *Name, uint8_t *FunctionStart, - uint8_t *FunctionEnd) { - // FIXME: This should really reference the MCAsmInfo to get the global - // prefix. - if (Name[0] == '_') ++Name; - Function *F = M->getFunction(Name); - // Some ObjC names have a prefixed \01 in the IR. If we failed to find - // the symbol and it's of the ObjC conventions (starts with "-"), try - // prepending a \01 and see if we can find it that way. - if (!F && Name[0] == '-') - F = M->getFunction((Twine("\1") + Name).str()); - assert(F && "No matching function in JIT IR Module!"); - JMM->endFunctionBody(F, FunctionStart, FunctionEnd); + uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID) { + return JMM->allocateSpace(Size, Alignment); + } + + virtual void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true) { + return JMM->getPointerToNamedFunction(Name, AbortOnFailure); } }; diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp similarity index 70% rename from contrib/llvm/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp rename to contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp index 9a9ed6d33484..e6142e3678da 100644 --- a/contrib/llvm/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp +++ b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp @@ -7,51 +7,55 @@ // //===----------------------------------------------------------------------===// // -// This file defines a JITEventListener object that calls into OProfile to tell -// it about JITted functions. For now, we only record function names and sizes, -// but eventually we'll also record line number information. -// -// See http://oprofile.sourceforge.net/doc/devel/jit-interface.html for the -// definition of the interface we're using. +// This file defines a JITEventListener object that uses OProfileWrapper to tell +// oprofile about JITted functions, including source line information. // //===----------------------------------------------------------------------===// +#include "llvm/Config/config.h" +#include "llvm/ExecutionEngine/JITEventListener.h" + #define DEBUG_TYPE "oprofile-jit-event-listener" #include "llvm/Function.h" -#include "llvm/Metadata.h" -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/ExecutionEngine/OProfileWrapper.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Errno.h" -#include "llvm/Config/config.h" -#include +#include "EventListenerCommon.h" + +#include +#include + using namespace llvm; - -#if USE_OPROFILE - -#include +using namespace llvm::jitprofiling; namespace { class OProfileJITEventListener : public JITEventListener { - op_agent_t Agent; + OProfileWrapper& Wrapper; + + void initialize(); + public: - OProfileJITEventListener(); + OProfileJITEventListener(OProfileWrapper& LibraryWrapper) + : Wrapper(LibraryWrapper) { + initialize(); + } + ~OProfileJITEventListener(); virtual void NotifyFunctionEmitted(const Function &F, - void *FnStart, size_t FnSize, - const EmittedFunctionDetails &Details); + void *FnStart, size_t FnSize, + const JITEvent_EmittedFunctionDetails &Details); + virtual void NotifyFreeingMachineCode(void *OldPtr); }; -OProfileJITEventListener::OProfileJITEventListener() - : Agent(op_open_agent()) { - if (Agent == NULL) { +void OProfileJITEventListener::initialize() { + if (!Wrapper.op_open_agent()) { const std::string err_str = sys::StrError(); DEBUG(dbgs() << "Failed to connect to OProfile agent: " << err_str << "\n"); } else { @@ -60,8 +64,8 @@ OProfileJITEventListener::OProfileJITEventListener() } OProfileJITEventListener::~OProfileJITEventListener() { - if (Agent != NULL) { - if (op_close_agent(Agent) == -1) { + if (Wrapper.isAgentAvailable()) { + if (Wrapper.op_close_agent() == -1) { const std::string err_str = sys::StrError(); DEBUG(dbgs() << "Failed to disconnect from OProfile agent: " << err_str << "\n"); @@ -71,22 +75,6 @@ OProfileJITEventListener::~OProfileJITEventListener() { } } -class FilenameCache { - // Holds the filename of each Scope, so that we can pass a null-terminated - // string into oprofile. Use an AssertingVH rather than a ValueMap because we - // shouldn't be modifying any MDNodes while this map is alive. - DenseMap, std::string> Filenames; - - public: - const char *getFilename(MDNode *Scope) { - std::string &Filename = Filenames[Scope]; - if (Filename.empty()) { - Filename = DIScope(Scope).getFilename(); - } - return Filename.c_str(); - } -}; - static debug_line_info LineStartToOProfileFormat( const MachineFunction &MF, FilenameCache &Filenames, uintptr_t Address, DebugLoc Loc) { @@ -103,9 +91,9 @@ static debug_line_info LineStartToOProfileFormat( // Adds the just-emitted function to the symbol table. void OProfileJITEventListener::NotifyFunctionEmitted( const Function &F, void *FnStart, size_t FnSize, - const EmittedFunctionDetails &Details) { + const JITEvent_EmittedFunctionDetails &Details) { assert(F.hasName() && FnStart != 0 && "Bad symbol to add"); - if (op_write_native_code(Agent, F.getName().data(), + if (Wrapper.op_write_native_code(F.getName().data(), reinterpret_cast(FnStart), FnStart, FnSize) == -1) { DEBUG(dbgs() << "Failed to tell OProfile about native function " @@ -151,8 +139,8 @@ void OProfileJITEventListener::NotifyFunctionEmitted( // line info's address to include the start of the function. LineInfo[0].vma = reinterpret_cast(FnStart); - if (op_write_debug_line_info(Agent, FnStart, - LineInfo.size(), &*LineInfo.begin()) == -1) { + if (Wrapper.op_write_debug_line_info(FnStart, LineInfo.size(), + &*LineInfo.begin()) == -1) { DEBUG(dbgs() << "Failed to tell OProfile about line numbers for native function " << F.getName() << " at [" @@ -164,7 +152,7 @@ void OProfileJITEventListener::NotifyFunctionEmitted( // Removes the being-deleted function from the symbol table. void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) { assert(FnStart && "Invalid function pointer"); - if (op_unload_native_code(Agent, reinterpret_cast(FnStart)) == -1) { + if (Wrapper.op_unload_native_code(reinterpret_cast(FnStart)) == -1) { DEBUG(dbgs() << "Failed to tell OProfile about unload of native function at " << FnStart << "\n"); @@ -174,19 +162,16 @@ void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) { } // anonymous namespace. namespace llvm { -JITEventListener *createOProfileJITEventListener() { - return new OProfileJITEventListener; -} +JITEventListener *JITEventListener::createOProfileJITEventListener() { + static OwningPtr JITProfilingWrapper(new OProfileWrapper); + return new OProfileJITEventListener(*JITProfilingWrapper); } -#else // USE_OPROFILE - -namespace llvm { -// By defining this to return NULL, we can let clients call it unconditionally, -// even if they haven't configured with the OProfile libraries. -JITEventListener *createOProfileJITEventListener() { - return NULL; +// for testing +JITEventListener *JITEventListener::createOProfileJITEventListener( + OProfileWrapper* TestImpl) { + return new OProfileJITEventListener(*TestImpl); } -} // namespace llvm -#endif // USE_OPROFILE +} // namespace llvm + diff --git a/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp new file mode 100644 index 000000000000..d67f5370b862 --- /dev/null +++ b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp @@ -0,0 +1,263 @@ +//===-- OProfileWrapper.cpp - OProfile JIT API Wrapper implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the interface in OProfileWrapper.h. It is responsible +// for loading the opagent dynamic library when the first call to an op_ +// function occurs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/OProfileWrapper.h" + +#define DEBUG_TYPE "oprofile-wrapper" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/MutexGuard.h" +#include "llvm/ADT/SmallString.h" + +#include +#include +#include +#include +#include +#include + +namespace { + +// Global mutex to ensure a single thread initializes oprofile agent. +llvm::sys::Mutex OProfileInitializationMutex; + +} // anonymous namespace + +namespace llvm { + +OProfileWrapper::OProfileWrapper() +: Agent(0), + OpenAgentFunc(0), + CloseAgentFunc(0), + WriteNativeCodeFunc(0), + WriteDebugLineInfoFunc(0), + UnloadNativeCodeFunc(0), + MajorVersionFunc(0), + MinorVersionFunc(0), + IsOProfileRunningFunc(0), + Initialized(false) { +} + +bool OProfileWrapper::initialize() { + using namespace llvm; + using namespace llvm::sys; + + MutexGuard Guard(OProfileInitializationMutex); + + if (Initialized) + return OpenAgentFunc != 0; + + Initialized = true; + + // If the oprofile daemon is not running, don't load the opagent library + if (!isOProfileRunning()) { + DEBUG(dbgs() << "OProfile daemon is not detected.\n"); + return false; + } + + std::string error; + if(!DynamicLibrary::LoadLibraryPermanently("libopagent.so", &error)) { + DEBUG(dbgs() + << "OProfile connector library libopagent.so could not be loaded: " + << error << "\n"); + } + + // Get the addresses of the opagent functions + OpenAgentFunc = (op_open_agent_ptr_t)(intptr_t) + DynamicLibrary::SearchForAddressOfSymbol("op_open_agent"); + CloseAgentFunc = (op_close_agent_ptr_t)(intptr_t) + DynamicLibrary::SearchForAddressOfSymbol("op_close_agent"); + WriteNativeCodeFunc = (op_write_native_code_ptr_t)(intptr_t) + DynamicLibrary::SearchForAddressOfSymbol("op_write_native_code"); + WriteDebugLineInfoFunc = (op_write_debug_line_info_ptr_t)(intptr_t) + DynamicLibrary::SearchForAddressOfSymbol("op_write_debug_line_info"); + UnloadNativeCodeFunc = (op_unload_native_code_ptr_t)(intptr_t) + DynamicLibrary::SearchForAddressOfSymbol("op_unload_native_code"); + MajorVersionFunc = (op_major_version_ptr_t)(intptr_t) + DynamicLibrary::SearchForAddressOfSymbol("op_major_version"); + MinorVersionFunc = (op_major_version_ptr_t)(intptr_t) + DynamicLibrary::SearchForAddressOfSymbol("op_minor_version"); + + // With missing functions, we can do nothing + if (!OpenAgentFunc + || !CloseAgentFunc + || !WriteNativeCodeFunc + || !WriteDebugLineInfoFunc + || !UnloadNativeCodeFunc) { + OpenAgentFunc = 0; + CloseAgentFunc = 0; + WriteNativeCodeFunc = 0; + WriteDebugLineInfoFunc = 0; + UnloadNativeCodeFunc = 0; + return false; + } + + return true; +} + +bool OProfileWrapper::isOProfileRunning() { + if (IsOProfileRunningFunc != 0) + return IsOProfileRunningFunc(); + return checkForOProfileProcEntry(); +} + +bool OProfileWrapper::checkForOProfileProcEntry() { + DIR* ProcDir; + + ProcDir = opendir("/proc"); + if (!ProcDir) + return false; + + // Walk the /proc tree looking for the oprofile daemon + struct dirent* Entry; + while (0 != (Entry = readdir(ProcDir))) { + if (Entry->d_type == DT_DIR) { + // Build a path from the current entry name + SmallString<256> CmdLineFName; + raw_svector_ostream(CmdLineFName) << "/proc/" << Entry->d_name + << "/cmdline"; + + // Open the cmdline file + int CmdLineFD = open(CmdLineFName.c_str(), S_IRUSR); + if (CmdLineFD != -1) { + char ExeName[PATH_MAX+1]; + char* BaseName = 0; + + // Read the cmdline file + ssize_t NumRead = read(CmdLineFD, ExeName, PATH_MAX+1); + close(CmdLineFD); + ssize_t Idx = 0; + + // Find the terminator for the first string + while (Idx < NumRead-1 && ExeName[Idx] != 0) { + Idx++; + } + + // Go back to the last non-null character + Idx--; + + // Find the last path separator in the first string + while (Idx > 0) { + if (ExeName[Idx] == '/') { + BaseName = ExeName + Idx + 1; + break; + } + Idx--; + } + + // Test this to see if it is the oprofile daemon + if (BaseName != 0 && !strcmp("oprofiled", BaseName)) { + // If it is, we're done + closedir(ProcDir); + return true; + } + } + } + } + + // We've looked through all the files and didn't find the daemon + closedir(ProcDir); + return false; +} + +bool OProfileWrapper::op_open_agent() { + if (!Initialized) + initialize(); + + if (OpenAgentFunc != 0) { + Agent = OpenAgentFunc(); + return Agent != 0; + } + + return false; +} + +int OProfileWrapper::op_close_agent() { + if (!Initialized) + initialize(); + + int ret = -1; + if (Agent && CloseAgentFunc) { + ret = CloseAgentFunc(Agent); + if (ret == 0) { + Agent = 0; + } + } + return ret; +} + +bool OProfileWrapper::isAgentAvailable() { + return Agent != 0; +} + +int OProfileWrapper::op_write_native_code(const char* Name, + uint64_t Addr, + void const* Code, + const unsigned int Size) { + if (!Initialized) + initialize(); + + if (Agent && WriteNativeCodeFunc) + return WriteNativeCodeFunc(Agent, Name, Addr, Code, Size); + + return -1; +} + +int OProfileWrapper::op_write_debug_line_info( + void const* Code, + size_t NumEntries, + struct debug_line_info const* Info) { + if (!Initialized) + initialize(); + + if (Agent && WriteDebugLineInfoFunc) + return WriteDebugLineInfoFunc(Agent, Code, NumEntries, Info); + + return -1; +} + +int OProfileWrapper::op_major_version() { + if (!Initialized) + initialize(); + + if (Agent && MajorVersionFunc) + return MajorVersionFunc(); + + return -1; +} + +int OProfileWrapper::op_minor_version() { + if (!Initialized) + initialize(); + + if (Agent && MinorVersionFunc) + return MinorVersionFunc(); + + return -1; +} + +int OProfileWrapper::op_unload_native_code(uint64_t Addr) { + if (!Initialized) + initialize(); + + if (Agent && UnloadNativeCodeFunc) + return UnloadNativeCodeFunc(Agent, Addr); + + return -1; +} + +} // namespace llvm diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 33dd70502798..63cec1aca3b1 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -1,4 +1,4 @@ -//===-- RuntimeDyld.cpp - Run-time dynamic linker for MC-JIT ------*- C++ -*-===// +//===-- RuntimeDyld.cpp - Run-time dynamic linker for MC-JIT ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -13,6 +13,10 @@ #define DEBUG_TYPE "dyld" #include "RuntimeDyldImpl.h" +#include "RuntimeDyldELF.h" +#include "RuntimeDyldMachO.h" +#include "llvm/Support/Path.h" + using namespace llvm; using namespace llvm::object; @@ -22,35 +26,383 @@ RuntimeDyldImpl::~RuntimeDyldImpl() {} namespace llvm { -void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress, - uint8_t *EndAddress) { - // Allocate memory for the function via the memory manager. - uintptr_t Size = EndAddress - StartAddress + 1; - uintptr_t AllocSize = Size; - uint8_t *Mem = MemMgr->startFunctionBody(Name.data(), AllocSize); - assert(Size >= (uint64_t)(EndAddress - StartAddress + 1) && - "Memory manager failed to allocate enough memory!"); - // Copy the function payload into the memory block. - memcpy(Mem, StartAddress, Size); - MemMgr->endFunctionBody(Name.data(), Mem, Mem + Size); - // Remember where we put it. - Functions[Name] = sys::MemoryBlock(Mem, Size); - // Default the assigned address for this symbol to wherever this - // allocated it. - SymbolTable[Name] = Mem; - DEBUG(dbgs() << " allocated to [" << Mem << ", " << Mem + Size << "]\n"); -} +namespace { + // Helper for extensive error checking in debug builds. + error_code Check(error_code Err) { + if (Err) { + report_fatal_error(Err.message()); + } + return Err; + } +} // end anonymous namespace // Resolve the relocations for all symbols we currently know about. void RuntimeDyldImpl::resolveRelocations() { - // Just iterate over the symbols in our symbol table and assign their - // addresses. - StringMap::iterator i = SymbolTable.begin(); - StringMap::iterator e = SymbolTable.end(); - for (;i != e; ++i) - reassignSymbolAddress(i->getKey(), i->getValue()); + // First, resolve relocations associated with external symbols. + resolveSymbols(); + + // Just iterate over the sections we have and resolve all the relocations + // in them. Gross overkill, but it gets the job done. + for (int i = 0, e = Sections.size(); i != e; ++i) { + reassignSectionAddress(i, Sections[i].LoadAddress); + } } +void RuntimeDyldImpl::mapSectionAddress(void *LocalAddress, + uint64_t TargetAddress) { + for (unsigned i = 0, e = Sections.size(); i != e; ++i) { + if (Sections[i].Address == LocalAddress) { + reassignSectionAddress(i, TargetAddress); + return; + } + } + llvm_unreachable("Attempting to remap address of unknown section!"); +} + +bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) { + // FIXME: ObjectFile don't modify MemoryBuffer. + // It should use const MemoryBuffer as parameter. + OwningPtr obj(ObjectFile::createObjectFile( + const_cast(InputBuffer))); + if (!obj) + report_fatal_error("Unable to create object image from memory buffer!"); + + Arch = (Triple::ArchType)obj->getArch(); + + LocalSymbolMap LocalSymbols; // Functions and data symbols from the + // object file. + ObjSectionToIDMap LocalSections; // Used sections from the object file + CommonSymbolMap CommonSymbols; // Common symbols requiring allocation + uint64_t CommonSize = 0; + + error_code err; + // Parse symbols + DEBUG(dbgs() << "Parse symbols:\n"); + for (symbol_iterator i = obj->begin_symbols(), e = obj->end_symbols(); + i != e; i.increment(err)) { + Check(err); + object::SymbolRef::Type SymType; + StringRef Name; + Check(i->getType(SymType)); + Check(i->getName(Name)); + + uint32_t flags; + Check(i->getFlags(flags)); + + bool isCommon = flags & SymbolRef::SF_Common; + if (isCommon) { + // Add the common symbols to a list. We'll allocate them all below. + uint64_t Size = 0; + Check(i->getSize(Size)); + CommonSize += Size; + CommonSymbols[*i] = Size; + } else { + if (SymType == object::SymbolRef::ST_Function || + SymType == object::SymbolRef::ST_Data) { + uint64_t FileOffset; + StringRef sData; + section_iterator si = obj->end_sections(); + Check(i->getFileOffset(FileOffset)); + Check(i->getSection(si)); + if (si == obj->end_sections()) continue; + Check(si->getContents(sData)); + const uint8_t* SymPtr = (const uint8_t*)InputBuffer->getBufferStart() + + (uintptr_t)FileOffset; + uintptr_t SectOffset = (uintptr_t)(SymPtr - (const uint8_t*)sData.begin()); + unsigned SectionID = + findOrEmitSection(*si, + SymType == object::SymbolRef::ST_Function, + LocalSections); + bool isGlobal = flags & SymbolRef::SF_Global; + LocalSymbols[Name.data()] = SymbolLoc(SectionID, SectOffset); + DEBUG(dbgs() << "\tFileOffset: " << format("%p", (uintptr_t)FileOffset) + << " flags: " << flags + << " SID: " << SectionID + << " Offset: " << format("%p", SectOffset)); + if (isGlobal) + SymbolTable[Name] = SymbolLoc(SectionID, SectOffset); + } + } + DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name << "\n"); + } + + // Allocate common symbols + if (CommonSize != 0) + emitCommonSymbols(CommonSymbols, CommonSize, LocalSymbols); + + // Parse and proccess relocations + DEBUG(dbgs() << "Parse relocations:\n"); + for (section_iterator si = obj->begin_sections(), + se = obj->end_sections(); si != se; si.increment(err)) { + Check(err); + bool isFirstRelocation = true; + unsigned SectionID = 0; + StubMap Stubs; + + for (relocation_iterator i = si->begin_relocations(), + e = si->end_relocations(); i != e; i.increment(err)) { + Check(err); + + // If it's first relocation in this section, find its SectionID + if (isFirstRelocation) { + SectionID = findOrEmitSection(*si, true, LocalSections); + DEBUG(dbgs() << "\tSectionID: " << SectionID << "\n"); + isFirstRelocation = false; + } + + ObjRelocationInfo RI; + RI.SectionID = SectionID; + Check(i->getAdditionalInfo(RI.AdditionalInfo)); + Check(i->getOffset(RI.Offset)); + Check(i->getSymbol(RI.Symbol)); + Check(i->getType(RI.Type)); + + DEBUG(dbgs() << "\t\tAddend: " << RI.AdditionalInfo + << " Offset: " << format("%p", (uintptr_t)RI.Offset) + << " Type: " << (uint32_t)(RI.Type & 0xffffffffL) + << "\n"); + processRelocationRef(RI, *obj, LocalSections, LocalSymbols, Stubs); + } + } + return false; +} + +unsigned RuntimeDyldImpl::emitCommonSymbols(const CommonSymbolMap &Map, + uint64_t TotalSize, + LocalSymbolMap &LocalSymbols) { + // Allocate memory for the section + unsigned SectionID = Sections.size(); + uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, sizeof(void*), + SectionID); + if (!Addr) + report_fatal_error("Unable to allocate memory for common symbols!"); + uint64_t Offset = 0; + Sections.push_back(SectionEntry(Addr, TotalSize, TotalSize, 0)); + memset(Addr, 0, TotalSize); + + DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID + << " new addr: " << format("%p", Addr) + << " DataSize: " << TotalSize + << "\n"); + + // Assign the address of each symbol + for (CommonSymbolMap::const_iterator it = Map.begin(), itEnd = Map.end(); + it != itEnd; it++) { + uint64_t Size = it->second; + StringRef Name; + it->first.getName(Name); + LocalSymbols[Name.data()] = SymbolLoc(SectionID, Offset); + Offset += Size; + Addr += Size; + } + + return SectionID; +} + +unsigned RuntimeDyldImpl::emitSection(const SectionRef &Section, + bool IsCode) { + + unsigned StubBufSize = 0, + StubSize = getMaxStubSize(); + error_code err; + if (StubSize > 0) { + for (relocation_iterator i = Section.begin_relocations(), + e = Section.end_relocations(); i != e; i.increment(err), Check(err)) + StubBufSize += StubSize; + } + StringRef data; + uint64_t Alignment64; + Check(Section.getContents(data)); + Check(Section.getAlignment(Alignment64)); + + unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL; + bool IsRequired; + bool IsVirtual; + bool IsZeroInit; + uint64_t DataSize; + Check(Section.isRequiredForExecution(IsRequired)); + Check(Section.isVirtual(IsVirtual)); + Check(Section.isZeroInit(IsZeroInit)); + Check(Section.getSize(DataSize)); + + unsigned Allocate; + unsigned SectionID = Sections.size(); + uint8_t *Addr; + const char *pData = 0; + + // Some sections, such as debug info, don't need to be loaded for execution. + // Leave those where they are. + if (IsRequired) { + Allocate = DataSize + StubBufSize; + Addr = IsCode + ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID) + : MemMgr->allocateDataSection(Allocate, Alignment, SectionID); + if (!Addr) + report_fatal_error("Unable to allocate section memory!"); + + // Virtual sections have no data in the object image, so leave pData = 0 + if (!IsVirtual) + pData = data.data(); + + // Zero-initialize or copy the data from the image + if (IsZeroInit || IsVirtual) + memset(Addr, 0, DataSize); + else + memcpy(Addr, pData, DataSize); + + DEBUG(dbgs() << "emitSection SectionID: " << SectionID + << " obj addr: " << format("%p", pData) + << " new addr: " << format("%p", Addr) + << " DataSize: " << DataSize + << " StubBufSize: " << StubBufSize + << " Allocate: " << Allocate + << "\n"); + } + else { + // Even if we didn't load the section, we need to record an entry for it + // to handle later processing (and by 'handle' I mean don't do anything + // with these sections). + Allocate = 0; + Addr = 0; + DEBUG(dbgs() << "emitSection SectionID: " << SectionID + << " obj addr: " << format("%p", data.data()) + << " new addr: 0" + << " DataSize: " << DataSize + << " StubBufSize: " << StubBufSize + << " Allocate: " << Allocate + << "\n"); + } + + Sections.push_back(SectionEntry(Addr, Allocate, DataSize,(uintptr_t)pData)); + return SectionID; +} + +unsigned RuntimeDyldImpl::findOrEmitSection(const SectionRef &Section, + bool IsCode, + ObjSectionToIDMap &LocalSections) { + + unsigned SectionID = 0; + ObjSectionToIDMap::iterator i = LocalSections.find(Section); + if (i != LocalSections.end()) + SectionID = i->second; + else { + SectionID = emitSection(Section, IsCode); + LocalSections[Section] = SectionID; + } + return SectionID; +} + +void RuntimeDyldImpl::AddRelocation(const RelocationValueRef &Value, + unsigned SectionID, uintptr_t Offset, + uint32_t RelType) { + DEBUG(dbgs() << "AddRelocation SymNamePtr: " << format("%p", Value.SymbolName) + << " SID: " << Value.SectionID + << " Addend: " << format("%p", Value.Addend) + << " Offset: " << format("%p", Offset) + << " RelType: " << format("%x", RelType) + << "\n"); + + if (Value.SymbolName == 0) { + Relocations[Value.SectionID].push_back(RelocationEntry( + SectionID, + Offset, + RelType, + Value.Addend)); + } else + SymbolRelocations[Value.SymbolName].push_back(RelocationEntry( + SectionID, + Offset, + RelType, + Value.Addend)); +} + +uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) { + // TODO: There is only ARM far stub now. We should add the Thumb stub, + // and stubs for branches Thumb - ARM and ARM - Thumb. + if (Arch == Triple::arm) { + uint32_t *StubAddr = (uint32_t*)Addr; + *StubAddr = 0xe51ff004; // ldr pc,