Vendor import of clang trunk r178860:

http://llvm.org/svn/llvm-project/cfe/trunk@178860
This commit is contained in:
Dimitry Andric 2013-04-08 18:45:10 +00:00
parent be7c9ec198
commit 809500fc2c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/vendor/clang/dist/; revision=249261
svn path=/vendor/clang/clang-trunk-r178860/; revision=249262; tag=vendor/clang/clang-trunk-r178860
2316 changed files with 149033 additions and 62833 deletions

4
.arcconfig Normal file
View File

@ -0,0 +1,4 @@
{
"project_id" : "clang",
"conduit_uri" : "http://llvm-reviews.chandlerc.com/"
}

3
.gitignore vendored
View File

@ -30,3 +30,6 @@ cscope.out
#==============================================================================#
# Clang extra user tools, which is tracked independently (clang-tools-extra).
tools/extra
# Sphinx build products
docs/_build
docs/analyzer/_build

View File

@ -66,6 +66,11 @@ if( CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR )
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib )
set( CLANG_BUILT_STANDALONE 1 )
find_package(LibXml2)
if (LIBXML2_FOUND)
set(CLANG_HAVE_LIBXML 1)
endif ()
endif()
set(CLANG_RESOURCE_DIR "" CACHE STRING
@ -133,16 +138,17 @@ configure_file(
# Add appropriate flags for GCC
if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-common -Woverloaded-virtual -Wcast-qual -fno-strict-aliasing -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings")
check_cxx_compiler_flag("-Werror -Wnested-anon-types" CXX_SUPPORTS_NO_NESTED_ANON_TYPES_FLAG)
if( CXX_SUPPORTS_NO_NESTED_ANON_TYPES_FLAG )
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-nested-anon-types" )
endif()
endif ()
if (APPLE)
set(CMAKE_MODULE_LINKER_FLAGS "-Wl,-flat_namespace -Wl,-undefined -Wl,suppress")
endif ()
# libxml2 is an optional dependency, required only to run validation
# tests on XML output.
find_package(LibXml2)
configure_file(
${CLANG_SOURCE_DIR}/include/clang/Config/config.h.cmake
${CLANG_BINARY_DIR}/include/clang/Config/config.h)
@ -253,6 +259,9 @@ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/
add_definitions( -D_GNU_SOURCE )
# FIXME: They should be options.
add_definitions(-DCLANG_ENABLE_ARCMT -DCLANG_ENABLE_REWRITER -DCLANG_ENABLE_STATIC_ANALYZER)
# Clang version information
set(CLANG_EXECUTABLE_VERSION
"${CLANG_VERSION_MAJOR}.${CLANG_VERSION_MINOR}" CACHE STRING
@ -272,13 +281,15 @@ add_subdirectory(runtime)
option(CLANG_BUILD_EXAMPLES "Build CLANG example programs by default." OFF)
add_subdirectory(examples)
option(CLANG_INCLUDE_TESTS
"Generate build targets for the Clang unit tests."
${LLVM_INCLUDE_TESTS})
# TODO: docs.
add_subdirectory(test)
if( LLVM_INCLUDE_TESTS )
if( NOT CLANG_BUILT_STANDALONE )
add_subdirectory(unittests)
endif()
if( CLANG_INCLUDE_TESTS )
add_subdirectory(unittests)
endif()
# Workaround for MSVS10 to avoid the Dialog Hell

40
CODE_OWNERS.TXT Normal file
View File

@ -0,0 +1,40 @@
This file is a list of the people responsible for ensuring that patches for a
particular part of Clang are reviewed, either by themself or by someone else.
They are also the gatekeepers for their part of Clang, with the final word on
what goes in or not.
The list is sorted by surname and formatted to allow easy grepping and
beautification by scripts. The fields are: name (N), email (E), web-address
(W), PGP key ID and fingerprint (P), description (D), and snail-mail address
(S).
N: Chandler Carruth
E: chandlerc@gmail.com
E: chandlerc@google.com
D: CMake, library layering
N: Eric Christopher
E: echristo@gmail.com
D: Debug Information, autotools/configure/make build, inline assembly
N: Doug Gregor
D: All parts of Clang not covered by someone else
N: Anton Korobeynikov
E: anton@korobeynikov.info
D: Exception handling, Windows codegen, ARM EABI
N: Ted Kremenek
D: Clang Static Analyzer
N: John McCall
E: rjmccall@apple.com
D: Clang LLVM IR generation
N: Chad Rosier
E: mcrosier@apple.com
D: MS-inline asm, and the compiler driver
N: Richard Smith
E: richard@metafoo.co.uk
D: Clang Semantic Analysis (tools/clang/lib/Sema/* tools/clang/include/clang/Sema/*)

View File

@ -44,6 +44,6 @@ From inside the Clang build directory, run 'make install' to install the Clang
compiler and header files into the prefix directory selected when LLVM was
configured.
The Clang compiler is available as 'clang' and supports a gcc like command line
The Clang compiler is available as 'clang' and 'clang++'. It supports a gcc like command line
interface. See the man page for clang (installed into $prefix/share/man/man1)
for more information.

View File

@ -4,7 +4,7 @@ LLVM Release License
University of Illinois/NCSA
Open Source License
Copyright (c) 2007-2012 University of Illinois at Urbana-Champaign.
Copyright (c) 2007-2013 University of Illinois at Urbana-Champaign.
All rights reserved.
Developed by:

View File

@ -2,9 +2,6 @@
// Random Notes
//===---------------------------------------------------------------------===//
C90/C99/C++ Comparisons:
http://david.tribble.com/text/cdiffs.htm
//===---------------------------------------------------------------------===//
To time GCC preprocessing speed without output, use:

View File

@ -1271,6 +1271,17 @@ def translation_unit(self):
# created.
return self._tu
@property
def referenced(self):
"""
For a cursor that is a reference, returns a cursor
representing the entity that it references.
"""
if not hasattr(self, '_referenced'):
self._referenced = conf.lib.clang_getCursorReferenced(self)
return self._referenced
def get_arguments(self):
"""Return an iterator for accessing the arguments of this cursor."""
num_args = conf.lib.clang_Cursor_getNumArguments(self)
@ -1634,6 +1645,33 @@ class _CXUnsavedFile(Structure):
"""Helper for passing unsaved file arguments."""
_fields_ = [("name", c_char_p), ("contents", c_char_p), ('length', c_ulong)]
# Functions calls through the python interface are rather slow. Fortunately,
# for most symboles, we do not need to perform a function call. Their spelling
# never changes and is consequently provided by this spelling cache.
SpellingCache = {
# 0: CompletionChunk.Kind("Optional"),
# 1: CompletionChunk.Kind("TypedText"),
# 2: CompletionChunk.Kind("Text"),
# 3: CompletionChunk.Kind("Placeholder"),
# 4: CompletionChunk.Kind("Informative"),
# 5 : CompletionChunk.Kind("CurrentParameter"),
6: '(', # CompletionChunk.Kind("LeftParen"),
7: ')', # CompletionChunk.Kind("RightParen"),
8: ']', # CompletionChunk.Kind("LeftBracket"),
9: ']', # CompletionChunk.Kind("RightBracket"),
10: '{', # CompletionChunk.Kind("LeftBrace"),
11: '}', # CompletionChunk.Kind("RightBrace"),
12: '<', # CompletionChunk.Kind("LeftAngle"),
13: '>', # CompletionChunk.Kind("RightAngle"),
14: ', ', # CompletionChunk.Kind("Comma"),
# 15: CompletionChunk.Kind("ResultType"),
16: ':', # CompletionChunk.Kind("Colon"),
17: ';', # CompletionChunk.Kind("SemiColon"),
18: '=', # CompletionChunk.Kind("Equal"),
19: ' ', # CompletionChunk.Kind("HorizontalSpace"),
# 20: CompletionChunk.Kind("VerticalSpace")
}
class CompletionChunk:
class Kind:
def __init__(self, name):
@ -1648,18 +1686,30 @@ def __repr__(self):
def __init__(self, completionString, key):
self.cs = completionString
self.key = key
self.__kindNumberCache = -1
def __repr__(self):
return "{'" + self.spelling + "', " + str(self.kind) + "}"
@CachedProperty
def spelling(self):
if self.__kindNumber in SpellingCache:
return SpellingCache[self.__kindNumber]
return conf.lib.clang_getCompletionChunkText(self.cs, self.key).spelling
# We do not use @CachedProperty here, as the manual implementation is
# apparently still significantly faster. Please profile carefully if you
# would like to add CachedProperty back.
@property
def __kindNumber(self):
if self.__kindNumberCache == -1:
self.__kindNumberCache = \
conf.lib.clang_getCompletionChunkKind(self.cs, self.key)
return self.__kindNumberCache
@CachedProperty
def kind(self):
res = conf.lib.clang_getCompletionChunkKind(self.cs, self.key)
return completionChunkKindMap[res]
return completionChunkKindMap[self.__kindNumber]
@CachedProperty
def string(self):
@ -1672,19 +1722,19 @@ def string(self):
None
def isKindOptional(self):
return self.kind == completionChunkKindMap[0]
return self.__kindNumber == 0
def isKindTypedText(self):
return self.kind == completionChunkKindMap[1]
return self.__kindNumber == 1
def isKindPlaceHolder(self):
return self.kind == completionChunkKindMap[3]
return self.__kindNumber == 3
def isKindInformative(self):
return self.kind == completionChunkKindMap[4]
return self.__kindNumber == 4
def isKindResultType(self):
return self.kind == completionChunkKindMap[15]
return self.__kindNumber == 15
completionChunkKindMap = {
0: CompletionChunk.Kind("Optional"),
@ -1965,7 +2015,7 @@ def from_source(cls, filename, args=None, unsaved_files=None, options=0,
len(args), unsaved_array,
len(unsaved_files), options)
if ptr is None:
if not ptr:
raise TranslationUnitLoadError("Error parsing translation unit.")
return cls(ptr, index=index)
@ -1987,7 +2037,7 @@ def from_ast_file(cls, filename, index=None):
index = Index.create()
ptr = conf.lib.clang_createTranslationUnit(index, filename)
if ptr is None:
if not ptr:
raise TranslationUnitLoadError(filename)
return cls(ptr=ptr, index=index)
@ -3046,13 +3096,13 @@ def set_library_path(path):
Config.library_path = path
@staticmethod
def set_library_file(file):
"""Set the exact location of libclang from"""
def set_library_file(filename):
"""Set the exact location of libclang"""
if Config.loaded:
raise Exception("library file must be set before before using " \
"any other functionalities in libclang.")
Config.library_file = path
Config.library_file = filename
@staticmethod
def set_compatibility_check(check_status):

View File

@ -250,3 +250,12 @@ def test_get_arguments():
assert len(arguments) == 2
assert arguments[0].spelling == "i"
assert arguments[1].spelling == "j"
def test_referenced():
tu = get_tu('void foo(); void bar() { foo(); }')
foo = get_cursor(tu, 'foo')
bar = get_cursor(tu, 'bar')
for c in bar.get_children():
if c.kind == CursorKind.CALL_EXPR:
assert c.referenced.spelling == foo.spelling
break

View File

@ -8,6 +8,7 @@
from clang.cindex import SourceLocation
from clang.cindex import SourceRange
from clang.cindex import TranslationUnitSaveError
from clang.cindex import TranslationUnitLoadError
from clang.cindex import TranslationUnit
from .util import get_cursor
from .util import get_tu
@ -239,3 +240,19 @@ def test_get_tokens_gc():
del tokens
gc.collect()
gc.collect() # Just in case.
def test_fail_from_source():
path = os.path.join(kInputsDir, 'non-existent.cpp')
try:
tu = TranslationUnit.from_source(path)
except TranslationUnitLoadError:
tu = None
assert tu == None
def test_fail_from_ast_file():
path = os.path.join(kInputsDir, 'non-existent.ast')
try:
tu = TranslationUnit.from_ast_file(path)
except TranslationUnitLoadError:
tu = None
assert tu == None

View File

@ -24,6 +24,9 @@
<optional>
<ref name="USR" />
</optional>
<optional>
<ref name="Headerfile" />
</optional>
<optional>
<ref name="Declaration" />
</optional>
@ -73,6 +76,9 @@
<ref name="USR" />
</optional>
<!-- TODO: Add exception specification. -->
<optional>
<ref name="Headerfile" />
</optional>
<optional>
<ref name="Declaration" />
</optional>
@ -120,6 +126,9 @@
<optional>
<ref name="USR" />
</optional>
<optional>
<ref name="Headerfile" />
</optional>
<optional>
<ref name="Declaration" />
</optional>
@ -152,6 +161,9 @@
<optional>
<ref name="USR" />
</optional>
<optional>
<ref name="Headerfile" />
</optional>
<optional>
<ref name="Declaration" />
</optional>
@ -185,6 +197,9 @@
<optional>
<ref name="USR" />
</optional>
<optional>
<ref name="Headerfile" />
</optional>
<optional>
<ref name="Declaration" />
</optional>
@ -218,6 +233,9 @@
<optional>
<ref name="USR" />
</optional>
<optional>
<ref name="Headerfile" />
</optional>
<optional>
<ref name="Declaration" />
</optional>
@ -251,6 +269,9 @@
<optional>
<ref name="USR" />
</optional>
<optional>
<ref name="Headerfile" />
</optional>
<optional>
<ref name="Declaration" />
</optional>
@ -329,6 +350,14 @@
</element>
</define>
<define name="Headerfile">
<element name="Headerfile">
<oneOrMore>
<ref name="TextBlockContent" />
</oneOrMore>
</element>
</define>
<define name="Discussion">
<element name="Discussion">
<zeroOrMore>
@ -409,7 +438,7 @@
<define name="Availability">
<element name="Availability">
<attribute name="distribution">
<data type="string" />
<data type="string" />
</attribute>
<optional>
<element name="IntroducedInVersion">
@ -470,6 +499,30 @@
<define name="TextBlockContent">
<choice>
<element name="Para">
<optional>
<attribute name="kind">
<choice>
<value>attention</value>
<value>author</value>
<value>authors</value>
<value>bug</value>
<value>copyright</value>
<value>date</value>
<value>invariant</value>
<value>note</value>
<value>post</value>
<value>pre</value>
<value>remark</value>
<value>remarks</value>
<value>sa</value>
<value>see</value>
<value>since</value>
<value>todo</value>
<value>version</value>
<value>warning</value>
</choice>
</attribute>
</optional>
<zeroOrMore>
<ref name="TextInlineContent" />
</zeroOrMore>

View File

@ -1,171 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<!-- Material used from: HTML 4.01 specs: http://www.w3.org/TR/html401/ -->
<html>
<head>
<META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
<title>AddressSanitizer, a fast memory error detector</title>
<link type="text/css" rel="stylesheet" href="../menu.css">
<link type="text/css" rel="stylesheet" href="../content.css">
<style type="text/css">
td {
vertical-align: top;
}
</style>
</head>
<body>
<!--#include virtual="../menu.html.incl"-->
<div id="content">
<h1>AddressSanitizer</h1>
<ul>
<li> <a href="#intro">Introduction</a>
<li> <a href="#howtobuild">How to Build</a>
<li> <a href="#usage">Usage</a>
<ul><li> <a href="#has_feature">__has_feature(address_sanitizer)</a></ul>
<ul><li> <a href="#no_address_safety_analysis">
__attribute__((no_address_safety_analysis))</a></ul>
<li> <a href="#platforms">Supported Platforms</a>
<li> <a href="#limitations">Limitations</a>
<li> <a href="#status">Current Status</a>
<li> <a href="#moreinfo">More Information</a>
</ul>
<h2 id="intro">Introduction</h2>
AddressSanitizer is a fast memory error detector.
It consists of a compiler instrumentation module and a run-time library.
The tool can detect the following types of bugs:
<ul> <li> Out-of-bounds accesses to heap, stack and globals
<li> Use-after-free
<li> Use-after-return (to some extent)
<li> Double-free, invalid free
</ul>
Typical slowdown introduced by AddressSanitizer is <b>2x</b>.
<h2 id="howtobuild">How to build</h2>
Follow the <a href="../get_started.html">clang build instructions</a>.
CMake build is supported.<BR>
<h2 id="usage">Usage</h2>
Simply compile and link your program with <tt>-fsanitize=address</tt> flag. <BR>
The AddressSanitizer run-time library should be linked to the final executable,
so make sure to use <tt>clang</tt> (not <tt>ld</tt>) for the final link step.<BR>
When linking shared libraries, the AddressSanitizer run-time is not linked,
so <tt>-Wl,-z,defs</tt> may cause link errors (don't use it with AddressSanitizer). <BR>
To get a reasonable performance add <tt>-O1</tt> or higher. <BR>
To get nicer stack traces in error messages add
<tt>-fno-omit-frame-pointer</tt>. <BR>
To get perfect stack traces you may need to disable inlining (just use <tt>-O1</tt>) and tail call
elimination (<tt>-fno-optimize-sibling-calls</tt>).
<pre>
% cat example_UseAfterFree.cc
int main(int argc, char **argv) {
int *array = new int[100];
delete [] array;
return array[argc]; // BOOM
}
</pre>
<pre>
# Compile and link
% clang -O1 -g -fsanitize=address -fno-omit-frame-pointer example_UseAfterFree.cc
</pre>
OR
<pre>
# Compile
% clang -O1 -g -fsanitize=address -fno-omit-frame-pointer -c example_UseAfterFree.cc
# Link
% clang -g -fsanitize=address example_UseAfterFree.o
</pre>
If a bug is detected, the program will print an error message to stderr and exit with a
non-zero exit code.
Currently, AddressSanitizer does not symbolize its output, so you may need to use a
separate script to symbolize the result offline (this will be fixed in future).
<pre>
% ./a.out 2> log
% projects/compiler-rt/lib/asan/scripts/asan_symbolize.py / < log | c++filt
==9442== ERROR: AddressSanitizer heap-use-after-free on address 0x7f7ddab8c084 at pc 0x403c8c bp 0x7fff87fb82d0 sp 0x7fff87fb82c8
READ of size 4 at 0x7f7ddab8c084 thread T0
#0 0x403c8c in main example_UseAfterFree.cc:4
#1 0x7f7ddabcac4d in __libc_start_main ??:0
0x7f7ddab8c084 is located 4 bytes inside of 400-byte region [0x7f7ddab8c080,0x7f7ddab8c210)
freed by thread T0 here:
#0 0x404704 in operator delete[](void*) ??:0
#1 0x403c53 in main example_UseAfterFree.cc:4
#2 0x7f7ddabcac4d in __libc_start_main ??:0
previously allocated by thread T0 here:
#0 0x404544 in operator new[](unsigned long) ??:0
#1 0x403c43 in main example_UseAfterFree.cc:2
#2 0x7f7ddabcac4d in __libc_start_main ??:0
==9442== ABORTING
</pre>
AddressSanitizer exits on the first detected error. This is by design.
One reason: it makes the generated code smaller and faster (both by ~5%).
Another reason: this makes fixing bugs unavoidable. With Valgrind, it is often
the case that users treat Valgrind warnings as false positives
(which they are not) and don't fix them.
<h3 id="has_feature">__has_feature(address_sanitizer)</h3>
In some cases one may need to execute different code depending on whether
AddressSanitizer is enabled.
<a href="LanguageExtensions.html#__has_feature_extension">__has_feature</a>
can be used for this purpose.
<pre>
#if defined(__has_feature)
# if __has_feature(address_sanitizer)
code that builds only under AddressSanitizer
# endif
#endif
</pre>
<h3 id="no_address_safety_analysis">__attribute__((no_address_safety_analysis))</h3>
Some code should not be instrumented by AddressSanitizer.
One may use the function attribute
<a href="LanguageExtensions.html#address_sanitizer">
<tt>no_address_safety_analysis</tt></a>
to disable instrumentation of a particular function.
This attribute may not be supported by other compilers, so we suggest to
use it together with <tt>__has_feature(address_sanitizer)</tt>.
Note: currently, this attribute will be lost if the function is inlined.
<h2 id="platforms">Supported Platforms</h2>
AddressSanitizer is supported on
<ul><li>Linux i386/x86_64 (tested on Ubuntu 10.04 and 12.04).
<li>MacOS 10.6, 10.7 and 10.8 (i386/x86_64).
</ul>
Support for Linux ARM (and Android ARM) is in progress
(it may work, but is not guaranteed too).
<h2 id="limitations">Limitations</h2>
<ul>
<li> AddressSanitizer uses more real memory than a native run.
Exact overhead depends on the allocations sizes. The smaller the
allocations you make the bigger the overhead is.
<li> AddressSanitizer uses more stack memory. We have seen up to 3x increase.
<li> On 64-bit platforms AddressSanitizer maps (but not reserves)
16+ Terabytes of virtual address space.
This means that tools like <tt>ulimit</tt> may not work as usually expected.
<li> Static linking is not supported.
</ul>
<h2 id="status">Current Status</h2>
AddressSanitizer is fully functional on supported platforms starting from LLVM 3.1.
The test suite is integrated into CMake build and can be run with
<tt>make check-asan</tt> command.
<h2 id="moreinfo">More Information</h2>
<a href="http://code.google.com/p/address-sanitizer/">http://code.google.com/p/address-sanitizer</a>.
</div>
</body>
</html>

163
docs/AddressSanitizer.rst Normal file
View File

@ -0,0 +1,163 @@
================
AddressSanitizer
================
.. contents::
:local:
Introduction
============
AddressSanitizer is a fast memory error detector. It consists of a compiler
instrumentation module and a run-time library. The tool can detect the
following types of bugs:
* Out-of-bounds accesses to heap, stack and globals
* Use-after-free
* Use-after-return (to some extent)
* Double-free, invalid free
Typical slowdown introduced by AddressSanitizer is **2x**.
How to build
============
Follow the `clang build instructions <../get_started.html>`_. CMake build is
supported.
Usage
=====
Simply compile and link your program with ``-fsanitize=address`` flag. The
AddressSanitizer run-time library should be linked to the final executable, so
make sure to use ``clang`` (not ``ld``) for the final link step. When linking
shared libraries, the AddressSanitizer run-time is not linked, so
``-Wl,-z,defs`` may cause link errors (don't use it with AddressSanitizer). To
get a reasonable performance add ``-O1`` or higher. To get nicer stack traces
in error messages add ``-fno-omit-frame-pointer``. To get perfect stack traces
you may need to disable inlining (just use ``-O1``) and tail call elimination
(``-fno-optimize-sibling-calls``).
.. code-block:: console
% cat example_UseAfterFree.cc
int main(int argc, char **argv) {
int *array = new int[100];
delete [] array;
return array[argc]; // BOOM
}
# Compile and link
% clang -O1 -g -fsanitize=address -fno-omit-frame-pointer example_UseAfterFree.cc
or:
.. code-block:: console
# Compile
% clang -O1 -g -fsanitize=address -fno-omit-frame-pointer -c example_UseAfterFree.cc
# Link
% clang -g -fsanitize=address example_UseAfterFree.o
If a bug is detected, the program will print an error message to stderr and
exit with a non-zero exit code. Currently, AddressSanitizer does not symbolize
its output, so you may need to use a separate script to symbolize the result
offline (this will be fixed in future).
.. code-block:: console
% ./a.out 2> log
% projects/compiler-rt/lib/asan/scripts/asan_symbolize.py / < log | c++filt
==9442== ERROR: AddressSanitizer heap-use-after-free on address 0x7f7ddab8c084 at pc 0x403c8c bp 0x7fff87fb82d0 sp 0x7fff87fb82c8
READ of size 4 at 0x7f7ddab8c084 thread T0
#0 0x403c8c in main example_UseAfterFree.cc:4
#1 0x7f7ddabcac4d in __libc_start_main ??:0
0x7f7ddab8c084 is located 4 bytes inside of 400-byte region [0x7f7ddab8c080,0x7f7ddab8c210)
freed by thread T0 here:
#0 0x404704 in operator delete[](void*) ??:0
#1 0x403c53 in main example_UseAfterFree.cc:4
#2 0x7f7ddabcac4d in __libc_start_main ??:0
previously allocated by thread T0 here:
#0 0x404544 in operator new[](unsigned long) ??:0
#1 0x403c43 in main example_UseAfterFree.cc:2
#2 0x7f7ddabcac4d in __libc_start_main ??:0
==9442== ABORTING
AddressSanitizer exits on the first detected error. This is by design.
One reason: it makes the generated code smaller and faster (both by
~5%). Another reason: this makes fixing bugs unavoidable. With Valgrind,
it is often the case that users treat Valgrind warnings as false
positives (which they are not) and don't fix them.
``__has_feature(address_sanitizer)``
------------------------------------
In some cases one may need to execute different code depending on whether
AddressSanitizer is enabled.
:ref:`\_\_has\_feature <langext-__has_feature-__has_extension>` can be used for
this purpose.
.. code-block:: c
#if defined(__has_feature)
# if __has_feature(address_sanitizer)
// code that builds only under AddressSanitizer
# endif
#endif
``__attribute__((no_sanitize_address))``
-----------------------------------------------
Some code should not be instrumented by AddressSanitizer. One may use the
function attribute
:ref:`no_sanitize_address <langext-address_sanitizer>`
(or a deprecated synonym `no_address_safety_analysis`)
to disable instrumentation of a particular function. This attribute may not be
supported by other compilers, so we suggest to use it together with
``__has_feature(address_sanitizer)``. Note: currently, this attribute will be
lost if the function is inlined.
Initialization order checking
-----------------------------
AddressSanitizer can optionally detect dynamic initialization order problems,
when initialization of globals defined in one translation unit uses
globals defined in another translation unit. To enable this check at runtime,
you should set environment variable
``ASAN_OPTIONS=check_initialization_order=1``.
Supported Platforms
===================
AddressSanitizer is supported on
* Linux i386/x86\_64 (tested on Ubuntu 10.04 and 12.04);
* MacOS 10.6, 10.7 and 10.8 (i386/x86\_64).
Support for Linux ARM (and Android ARM) is in progress (it may work, but
is not guaranteed too).
Limitations
===========
* AddressSanitizer uses more real memory than a native run. Exact overhead
depends on the allocations sizes. The smaller the allocations you make the
bigger the overhead is.
* AddressSanitizer uses more stack memory. We have seen up to 3x increase.
* On 64-bit platforms AddressSanitizer maps (but not reserves) 16+ Terabytes of
virtual address space. This means that tools like ``ulimit`` may not work as
usually expected.
* Static linking is not supported.
Current Status
==============
AddressSanitizer is fully functional on supported platforms starting from LLVM
3.1. The test suite is integrated into CMake build and can be run with ``make
check-asan`` command.
More Information
================
`http://code.google.com/p/address-sanitizer <http://code.google.com/p/address-sanitizer/>`_

View File

@ -1,260 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>Static Analyzer Design Document: Memory Regions</title>
</head>
<body>
<h1>Static Analyzer Design Document: Memory Regions</h1>
<h3>Authors</h3>
<p>Ted Kremenek, <tt>kremenek at apple</tt><br>
Zhongxing Xu, <tt>xuzhongzhing at gmail</tt></p>
<h2 id="intro">Introduction</h2>
<p>The path-sensitive analysis engine in libAnalysis employs an extensible API
for abstractly modeling the memory of an analyzed program. This API employs the
concept of "memory regions" to abstractly model chunks of program memory such as
program variables and dynamically allocated memory such as those returned from
'malloc' and 'alloca'. Regions are hierarchical, with subregions modeling
subtyping relationships, field and array offsets into larger chunks of memory,
and so on.</p>
<p>The region API consists of two components:</p>
<ul> <li>A taxonomy and representation of regions themselves within the analyzer
engine. The primary definitions and interfaces are described in <tt><a
href="http://clang.llvm.org/doxygen/MemRegion_8h-source.html">MemRegion.h</a></tt>.
At the root of the region hierarchy is the class <tt>MemRegion</tt> with
specific subclasses refining the region concept for variables, heap allocated
memory, and so forth.</li> <li>The modeling of binding of values to regions. For
example, modeling the value stored to a local variable <tt>x</tt> consists of
recording the binding between the region for <tt>x</tt> (which represents the
raw memory associated with <tt>x</tt>) and the value stored to <tt>x</tt>. This
binding relationship is captured with the notion of &quot;symbolic
stores.&quot;</li> </ul>
<p>Symbolic stores, which can be thought of as representing the relation
<tt>regions -> values</tt>, are implemented by subclasses of the
<tt>StoreManager</tt> class (<tt><a
href="http://clang.llvm.org/doxygen/Store_8h-source.html">Store.h</a></tt>). A
particular StoreManager implementation has complete flexibility concerning the
following:
<ul>
<li><em>How</em> to model the binding between regions and values</li>
<li><em>What</em> bindings are recorded
</ul>
<p>Together, both points allow different StoreManagers to tradeoff between
different levels of analysis precision and scalability concerning the reasoning
of program memory. Meanwhile, the core path-sensitive engine makes no
assumptions about either points, and queries a StoreManager about the bindings
to a memory region through a generic interface that all StoreManagers share. If
a particular StoreManager cannot reason about the potential bindings of a given
memory region (e.g., '<tt>BasicStoreManager</tt>' does not reason about fields
of structures) then the StoreManager can simply return 'unknown' (represented by
'<tt>UnknownVal</tt>') for a particular region-binding. This separation of
concerns not only isolates the core analysis engine from the details of
reasoning about program memory but also facilities the option of a client of the
path-sensitive engine to easily swap in different StoreManager implementations
that internally reason about program memory in very different ways.</p>
<p>The rest of this document is divided into two parts. We first discuss region
taxonomy and the semantics of regions. We then discuss the StoreManager
interface, and details of how the currently available StoreManager classes
implement region bindings.</p>
<h2 id="regions">Memory Regions and Region Taxonomy</h2>
<h3>Pointers</h3>
<p>Before talking about the memory regions, we would talk about the pointers
since memory regions are essentially used to represent pointer values.</p>
<p>The pointer is a type of values. Pointer values have two semantic aspects.
One is its physical value, which is an address or location. The other is the
type of the memory object residing in the address.</p>
<p>Memory regions are designed to abstract these two properties of the pointer.
The physical value of a pointer is represented by MemRegion pointers. The rvalue
type of the region corresponds to the type of the pointee object.</p>
<p>One complication is that we could have different view regions on the same
memory chunk. They represent the same memory location, but have different
abstract location, i.e., MemRegion pointers. Thus we need to canonicalize the
abstract locations to get a unique abstract location for one physical
location.</p>
<p>Furthermore, these different view regions may or may not represent memory
objects of different types. Some different types are semantically the same,
for example, 'struct s' and 'my_type' are the same type.</p>
<pre>
struct s;
typedef struct s my_type;
</pre>
<p>But <tt>char</tt> and <tt>int</tt> are not the same type in the code below:</p>
<pre>
void *p;
int *q = (int*) p;
char *r = (char*) p;
</pre>
<p>Thus we need to canonicalize the MemRegion which is used in binding and
retrieving.</p>
<h3>Regions</h3>
<p>Region is the entity used to model pointer values. A Region has the following
properties:</p>
<ul>
<li>Kind</li>
<li>ObjectType: the type of the object residing on the region.</li>
<li>LocationType: the type of the pointer value that the region corresponds to.
Usually this is the pointer to the ObjectType. But sometimes we want to cache
this type explicitly, for example, for a CodeTextRegion.</li>
<li>StartLocation</li>
<li>EndLocation</li>
</ul>
<h3>Symbolic Regions</h3>
<p>A symbolic region is a map of the concept of symbolic values into the domain
of regions. It is the way that we represent symbolic pointers. Whenever a
symbolic pointer value is needed, a symbolic region is created to represent
it.</p>
<p>A symbolic region has no type. It wraps a SymbolData. But sometimes we have
type information associated with a symbolic region. For this case, a
TypedViewRegion is created to layer the type information on top of the symbolic
region. The reason we do not carry type information with the symbolic region is
that the symbolic regions can have no type. To be consistent, we don't let them
to carry type information.</p>
<p>Like a symbolic pointer, a symbolic region may be NULL, has unknown extent,
and represents a generic chunk of memory.</p>
<p><em><b>NOTE</b>: We plan not to use loc::SymbolVal in RegionStore and remove it
gradually.</em></p>
<p>Symbolic regions get their rvalue types through the following ways:</p>
<ul>
<li>Through the parameter or global variable that points to it, e.g.:
<pre>
void f(struct s* p) {
...
}
</pre>
<p>The symbolic region pointed to by <tt>p</tt> has type <tt>struct
s</tt>.</p></li>
<li>Through explicit or implicit casts, e.g.:
<pre>
void f(void* p) {
struct s* q = (struct s*) p;
...
}
</pre>
</li>
</ul>
<p>We attach the type information to the symbolic region lazily. For the first
case above, we create the <tt>TypedViewRegion</tt> only when the pointer is
actually used to access the pointee memory object, that is when the element or
field region is created. For the cast case, the <tt>TypedViewRegion</tt> is
created when visiting the <tt>CastExpr</tt>.</p>
<p>The reason for doing lazy typing is that symbolic regions are sometimes only
used to do location comparison.</p>
<h3>Pointer Casts</h3>
<p>Pointer casts allow people to impose different 'views' onto a chunk of
memory.</p>
<p>Usually we have two kinds of casts. One kind of casts cast down with in the
type hierarchy. It imposes more specific views onto more generic memory regions.
The other kind of casts cast up with in the type hierarchy. It strips away more
specific views on top of the more generic memory regions.</p>
<p>We simulate the down casts by layering another <tt>TypedViewRegion</tt> on
top of the original region. We simulate the up casts by striping away the top
<tt>TypedViewRegion</tt>. Down casts is usually simple. For up casts, if the
there is no <tt>TypedViewRegion</tt> to be stripped, we return the original
region. If the underlying region is of the different type than the cast-to type,
we flag an error state.</p>
<p>For toll-free bridging casts, we return the original region.</p>
<p>We can set up a partial order for pointer types, with the most general type
<tt>void*</tt> at the top. The partial order forms a tree with <tt>void*</tt> as
its root node.</p>
<p>Every <tt>MemRegion</tt> has a root position in the type tree. For example,
the pointee region of <tt>void *p</tt> has its root position at the root node of
the tree. <tt>VarRegion</tt> of <tt>int x</tt> has its root position at the 'int
type' node.</p>
<p><tt>TypedViewRegion</tt> is used to move the region down or up in the tree.
Moving down in the tree adds a <tt>TypedViewRegion</tt>. Moving up in the tree
removes a <Tt>TypedViewRegion</tt>.</p>
<p>Do we want to allow moving up beyond the root position? This happens
when:</p> <pre> int x; void *p = &amp;x; </pre>
<p>The region of <tt>x</tt> has its root position at 'int*' node. the cast to
void* moves that region up to the 'void*' node. I propose to not allow such
casts, and assign the region of <tt>x</tt> for <tt>p</tt>.</p>
<p>Another non-ideal case is that people might cast to a non-generic pointer
from another non-generic pointer instead of first casting it back to the generic
pointer. Direct handling of this case would result in multiple layers of
TypedViewRegions. This enforces an incorrect semantic view to the region,
because we can only have one typed view on a region at a time. To avoid this
inconsistency, before casting the region, we strip the TypedViewRegion, then do
the cast. In summary, we only allow one layer of TypedViewRegion.</p>
<h3>Region Bindings</h3>
<p>The following region kinds are boundable: VarRegion, CompoundLiteralRegion,
StringRegion, ElementRegion, FieldRegion, and ObjCIvarRegion.</p>
<p>When binding regions, we perform canonicalization on element regions and field
regions. This is because we can have different views on the same region, some
of which are essentially the same view with different sugar type names.</p>
<p>To canonicalize a region, we get the canonical types for all TypedViewRegions
along the way up to the root region, and make new TypedViewRegions with those
canonical types.</p>
<p>For Objective-C and C++, perhaps another canonicalization rule should be
added: for FieldRegion, the least derived class that has the field is used as
the type of the super region of the FieldRegion.</p>
<p>All bindings and retrievings are done on the canonicalized regions.</p>
<p>Canonicalization is transparent outside the region store manager, and more
specifically, unaware outside the Bind() and Retrieve() method. We don't need to
consider region canonicalization when doing pointer cast.</p>
<h3>Constraint Manager</h3>
<p>The constraint manager reasons about the abstract location of memory objects.
We can have different views on a region, but none of these views changes the
location of that object. Thus we should get the same abstract location for those
regions.</p>
</body>
</html>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

935
docs/Block-ABI-Apple.rst Normal file
View File

@ -0,0 +1,935 @@
==================================
Block Implementation Specification
==================================
.. contents::
:local:
History
=======
* 2008/7/14 - created.
* 2008/8/21 - revised, C++.
* 2008/9/24 - add ``NULL`` ``isa`` field to ``__block`` storage.
* 2008/10/1 - revise block layout to use a ``static`` descriptor structure.
* 2008/10/6 - revise block layout to use an unsigned long int flags.
* 2008/10/28 - specify use of ``_Block_object_assign`` and
``_Block_object_dispose`` for all "Object" types in helper functions.
* 2008/10/30 - revise new layout to have invoke function in same place.
* 2008/10/30 - add ``__weak`` support.
* 2010/3/16 - rev for stret return, signature field.
* 2010/4/6 - improved wording.
* 2013/1/6 - improved wording and converted to rst.
This document describes the Apple ABI implementation specification of Blocks.
The first shipping version of this ABI is found in Mac OS X 10.6, and shall be
referred to as 10.6.ABI. As of 2010/3/16, the following describes the ABI
contract with the runtime and the compiler, and, as necessary, will be referred
to as ABI.2010.3.16.
Since the Apple ABI references symbols from other elements of the system, any
attempt to use this ABI on systems prior to SnowLeopard is undefined.
High Level
==========
The ABI of ``Blocks`` consist of their layout and the runtime functions required
by the compiler. A ``Block`` consists of a structure of the following form:
.. code-block:: c
struct Block_literal_1 {
void *isa; // initialized to &_NSConcreteStackBlock or &_NSConcreteGlobalBlock
int flags;
int reserved;
void (*invoke)(void *, ...);
struct Block_descriptor_1 {
unsigned long int reserved; // NULL
unsigned long int size; // sizeof(struct Block_literal_1)
// optional helper functions
void (*copy_helper)(void *dst, void *src); // IFF (1<<25)
void (*dispose_helper)(void *src); // IFF (1<<25)
// required ABI.2010.3.16
const char *signature; // IFF (1<<30)
} *descriptor;
// imported variables
};
The following flags bits are in use thusly for a possible ABI.2010.3.16:
.. code-block:: c
enum {
BLOCK_HAS_COPY_DISPOSE = (1 << 25),
BLOCK_HAS_CTOR = (1 << 26), // helpers have C++ code
BLOCK_IS_GLOBAL = (1 << 28),
BLOCK_HAS_STRET = (1 << 29), // IFF BLOCK_HAS_SIGNATURE
BLOCK_HAS_SIGNATURE = (1 << 30),
};
In 10.6.ABI the (1<<29) was usually set and was always ignored by the runtime -
it had been a transitional marker that did not get deleted after the
transition. This bit is now paired with (1<<30), and represented as the pair
(3<<30), for the following combinations of valid bit settings, and their
meanings:
.. code-block:: c
switch (flags & (3<<29)) {
case (0<<29): 10.6.ABI, no signature field available
case (1<<29): 10.6.ABI, no signature field available
case (2<<29): ABI.2010.3.16, regular calling convention, presence of signature field
case (3<<29): ABI.2010.3.16, stret calling convention, presence of signature field,
}
The signature field is not always populated.
The following discussions are presented as 10.6.ABI otherwise.
``Block`` literals may occur within functions where the structure is created in
stack local memory. They may also appear as initialization expressions for
``Block`` variables of global or ``static`` local variables.
When a ``Block`` literal expression is evaluated the stack based structure is
initialized as follows:
1. A ``static`` descriptor structure is declared and initialized as follows:
a. The ``invoke`` function pointer is set to a function that takes the
``Block`` structure as its first argument and the rest of the arguments (if
any) to the ``Block`` and executes the ``Block`` compound statement.
b. The ``size`` field is set to the size of the following ``Block`` literal
structure.
c. The ``copy_helper`` and ``dispose_helper`` function pointers are set to
respective helper functions if they are required by the ``Block`` literal.
2. A stack (or global) ``Block`` literal data structure is created and
initialized as follows:
a. The ``isa`` field is set to the address of the external
``_NSConcreteStackBlock``, which is a block of uninitialized memory supplied
in ``libSystem``, or ``_NSConcreteGlobalBlock`` if this is a static or file
level ``Block`` literal.
b. The ``flags`` field is set to zero unless there are variables imported
into the ``Block`` that need helper functions for program level
``Block_copy()`` and ``Block_release()`` operations, in which case the
(1<<25) flags bit is set.
As an example, the ``Block`` literal expression:
.. code-block:: c
^ { printf("hello world\n"); }
would cause the following to be created on a 32-bit system:
.. code-block:: c
struct __block_literal_1 {
void *isa;
int flags;
int reserved;
void (*invoke)(struct __block_literal_1 *);
struct __block_descriptor_1 *descriptor;
};
void __block_invoke_1(struct __block_literal_1 *_block) {
printf("hello world\n");
}
static struct __block_descriptor_1 {
unsigned long int reserved;
unsigned long int Block_size;
} __block_descriptor_1 = { 0, sizeof(struct __block_literal_1), __block_invoke_1 };
and where the ``Block`` literal itself appears:
.. code-block:: c
struct __block_literal_1 _block_literal = {
&_NSConcreteStackBlock,
(1<<29), <uninitialized>,
__block_invoke_1,
&__block_descriptor_1
};
A ``Block`` imports other ``Block`` references, ``const`` copies of other
variables, and variables marked ``__block``. In Objective-C, variables may
additionally be objects.
When a ``Block`` literal expression is used as the initial value of a global
or ``static`` local variable, it is initialized as follows:
.. code-block:: c
struct __block_literal_1 __block_literal_1 = {
&_NSConcreteGlobalBlock,
(1<<28)|(1<<29), <uninitialized>,
__block_invoke_1,
&__block_descriptor_1
};
that is, a different address is provided as the first value and a particular
(1<<28) bit is set in the ``flags`` field, and otherwise it is the same as for
stack based ``Block`` literals. This is an optimization that can be used for
any ``Block`` literal that imports no ``const`` or ``__block`` storage
variables.
Imported Variables
==================
Variables of ``auto`` storage class are imported as ``const`` copies. Variables
of ``__block`` storage class are imported as a pointer to an enclosing data
structure. Global variables are simply referenced and not considered as
imported.
Imported ``const`` copy variables
---------------------------------
Automatic storage variables not marked with ``__block`` are imported as
``const`` copies.
The simplest example is that of importing a variable of type ``int``:
.. code-block:: c
int x = 10;
void (^vv)(void) = ^{ printf("x is %d\n", x); }
x = 11;
vv();
which would be compiled to:
.. code-block:: c
struct __block_literal_2 {
void *isa;
int flags;
int reserved;
void (*invoke)(struct __block_literal_2 *);
struct __block_descriptor_2 *descriptor;
const int x;
};
void __block_invoke_2(struct __block_literal_2 *_block) {
printf("x is %d\n", _block->x);
}
static struct __block_descriptor_2 {
unsigned long int reserved;
unsigned long int Block_size;
} __block_descriptor_2 = { 0, sizeof(struct __block_literal_2) };
and:
.. code-block:: c
struct __block_literal_2 __block_literal_2 = {
&_NSConcreteStackBlock,
(1<<29), <uninitialized>,
__block_invoke_2,
&__block_descriptor_2,
x
};
In summary, scalars, structures, unions, and function pointers are generally
imported as ``const`` copies with no need for helper functions.
Imported ``const`` copy of ``Block`` reference
----------------------------------------------
The first case where copy and dispose helper functions are required is for the
case of when a ``Block`` itself is imported. In this case both a
``copy_helper`` function and a ``dispose_helper`` function are needed. The
``copy_helper`` function is passed both the existing stack based pointer and the
pointer to the new heap version and should call back into the runtime to
actually do the copy operation on the imported fields within the ``Block``. The
runtime functions are all described in :ref:`RuntimeHelperFunctions`.
A quick example:
.. code-block:: c
void (^existingBlock)(void) = ...;
void (^vv)(void) = ^{ existingBlock(); }
vv();
struct __block_literal_3 {
...; // existing block
};
struct __block_literal_4 {
void *isa;
int flags;
int reserved;
void (*invoke)(struct __block_literal_4 *);
struct __block_literal_3 *const existingBlock;
};
void __block_invoke_4(struct __block_literal_2 *_block) {
__block->existingBlock->invoke(__block->existingBlock);
}
void __block_copy_4(struct __block_literal_4 *dst, struct __block_literal_4 *src) {
//_Block_copy_assign(&dst->existingBlock, src->existingBlock, 0);
_Block_object_assign(&dst->existingBlock, src->existingBlock, BLOCK_FIELD_IS_BLOCK);
}
void __block_dispose_4(struct __block_literal_4 *src) {
// was _Block_destroy
_Block_object_dispose(src->existingBlock, BLOCK_FIELD_IS_BLOCK);
}
static struct __block_descriptor_4 {
unsigned long int reserved;
unsigned long int Block_size;
void (*copy_helper)(struct __block_literal_4 *dst, struct __block_literal_4 *src);
void (*dispose_helper)(struct __block_literal_4 *);
} __block_descriptor_4 = {
0,
sizeof(struct __block_literal_4),
__block_copy_4,
__block_dispose_4,
};
and where said ``Block`` is used:
.. code-block:: c
struct __block_literal_4 _block_literal = {
&_NSConcreteStackBlock,
(1<<25)|(1<<29), <uninitialized>
__block_invoke_4,
& __block_descriptor_4
existingBlock,
};
Importing ``__attribute__((NSObject))`` variables
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
GCC introduces ``__attribute__((NSObject))`` on structure pointers to mean "this
is an object". This is useful because many low level data structures are
declared as opaque structure pointers, e.g. ``CFStringRef``, ``CFArrayRef``,
etc. When used from C, however, these are still really objects and are the
second case where that requires copy and dispose helper functions to be
generated. The copy helper functions generated by the compiler should use the
``_Block_object_assign`` runtime helper function and in the dispose helper the
``_Block_object_dispose`` runtime helper function should be called.
For example, ``Block`` foo in the following:
.. code-block:: c
struct Opaque *__attribute__((NSObject)) objectPointer = ...;
...
void (^foo)(void) = ^{ CFPrint(objectPointer); };
would have the following helper functions generated:
.. code-block:: c
void __block_copy_foo(struct __block_literal_5 *dst, struct __block_literal_5 *src) {
_Block_object_assign(&dst->objectPointer, src-> objectPointer, BLOCK_FIELD_IS_OBJECT);
}
void __block_dispose_foo(struct __block_literal_5 *src) {
_Block_object_dispose(src->objectPointer, BLOCK_FIELD_IS_OBJECT);
}
Imported ``__block`` marked variables
-------------------------------------
Layout of ``__block`` marked variables
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The compiler must embed variables that are marked ``__block`` in a specialized
structure of the form:
.. code-block:: c
struct _block_byref_foo {
void *isa;
struct Block_byref *forwarding;
int flags; //refcount;
int size;
typeof(marked_variable) marked_variable;
};
Variables of certain types require helper functions for when ``Block_copy()``
and ``Block_release()`` are performed upon a referencing ``Block``. At the "C"
level only variables that are of type ``Block`` or ones that have
``__attribute__((NSObject))`` marked require helper functions. In Objective-C
objects require helper functions and in C++ stack based objects require helper
functions. Variables that require helper functions use the form:
.. code-block:: c
struct _block_byref_foo {
void *isa;
struct _block_byref_foo *forwarding;
int flags; //refcount;
int size;
// helper functions called via Block_copy() and Block_release()
void (*byref_keep)(void *dst, void *src);
void (*byref_dispose)(void *);
typeof(marked_variable) marked_variable;
};
The structure is initialized such that:
a. The ``forwarding`` pointer is set to the beginning of its enclosing
structure.
b. The ``size`` field is initialized to the total size of the enclosing
structure.
c. The ``flags`` field is set to either 0 if no helper functions are needed
or (1<<25) if they are.
d. The helper functions are initialized (if present).
e. The variable itself is set to its initial value.
f. The ``isa`` field is set to ``NULL``.
Access to ``__block`` variables from within its lexical scope
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
In order to "move" the variable to the heap upon a ``copy_helper`` operation the
compiler must rewrite access to such a variable to be indirect through the
structures ``forwarding`` pointer. For example:
.. code-block:: c
int __block i = 10;
i = 11;
would be rewritten to be:
.. code-block:: c
struct _block_byref_i {
void *isa;
struct _block_byref_i *forwarding;
int flags; //refcount;
int size;
int captured_i;
} i = { NULL, &i, 0, sizeof(struct _block_byref_i), 10 };
i.forwarding->captured_i = 11;
In the case of a ``Block`` reference variable being marked ``__block`` the
helper code generated must use the ``_Block_object_assign`` and
``_Block_object_dispose`` routines supplied by the runtime to make the
copies. For example:
.. code-block:: c
__block void (voidBlock)(void) = blockA;
voidBlock = blockB;
would translate into:
.. code-block:: c
struct _block_byref_voidBlock {
void *isa;
struct _block_byref_voidBlock *forwarding;
int flags; //refcount;
int size;
void (*byref_keep)(struct _block_byref_voidBlock *dst, struct _block_byref_voidBlock *src);
void (*byref_dispose)(struct _block_byref_voidBlock *);
void (^captured_voidBlock)(void);
};
void _block_byref_keep_helper(struct _block_byref_voidBlock *dst, struct _block_byref_voidBlock *src) {
//_Block_copy_assign(&dst->captured_voidBlock, src->captured_voidBlock, 0);
_Block_object_assign(&dst->captured_voidBlock, src->captured_voidBlock, BLOCK_FIELD_IS_BLOCK | BLOCK_BYREF_CALLER);
}
void _block_byref_dispose_helper(struct _block_byref_voidBlock *param) {
//_Block_destroy(param->captured_voidBlock, 0);
_Block_object_dispose(param->captured_voidBlock, BLOCK_FIELD_IS_BLOCK | BLOCK_BYREF_CALLER)}
and:
.. code-block:: c
struct _block_byref_voidBlock voidBlock = {( .forwarding=&voidBlock, .flags=(1<<25), .size=sizeof(struct _block_byref_voidBlock *),
.byref_keep=_block_byref_keep_helper, .byref_dispose=_block_byref_dispose_helper,
.captured_voidBlock=blockA )};
voidBlock.forwarding->captured_voidBlock = blockB;
Importing ``__block`` variables into ``Blocks``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
A ``Block`` that uses a ``__block`` variable in its compound statement body must
import the variable and emit ``copy_helper`` and ``dispose_helper`` helper
functions that, in turn, call back into the runtime to actually copy or release
the ``byref`` data block using the functions ``_Block_object_assign`` and
``_Block_object_dispose``.
For example:
.. code-block:: c
int __block i = 2;
functioncall(^{ i = 10; });
would translate to:
.. code-block:: c
struct _block_byref_i {
void *isa; // set to NULL
struct _block_byref_voidBlock *forwarding;
int flags; //refcount;
int size;
void (*byref_keep)(struct _block_byref_i *dst, struct _block_byref_i *src);
void (*byref_dispose)(struct _block_byref_i *);
int captured_i;
};
struct __block_literal_5 {
void *isa;
int flags;
int reserved;
void (*invoke)(struct __block_literal_5 *);
struct __block_descriptor_5 *descriptor;
struct _block_byref_i *i_holder;
};
void __block_invoke_5(struct __block_literal_5 *_block) {
_block->forwarding->captured_i = 10;
}
void __block_copy_5(struct __block_literal_5 *dst, struct __block_literal_5 *src) {
//_Block_byref_assign_copy(&dst->captured_i, src->captured_i);
_Block_object_assign(&dst->captured_i, src->captured_i, BLOCK_FIELD_IS_BYREF | BLOCK_BYREF_CALLER);
}
void __block_dispose_5(struct __block_literal_5 *src) {
//_Block_byref_release(src->captured_i);
_Block_object_dispose(src->captured_i, BLOCK_FIELD_IS_BYREF | BLOCK_BYREF_CALLER);
}
static struct __block_descriptor_5 {
unsigned long int reserved;
unsigned long int Block_size;
void (*copy_helper)(struct __block_literal_5 *dst, struct __block_literal_5 *src);
void (*dispose_helper)(struct __block_literal_5 *);
} __block_descriptor_5 = { 0, sizeof(struct __block_literal_5) __block_copy_5, __block_dispose_5 };
and:
.. code-block:: c
struct _block_byref_i i = {( .forwarding=&i, .flags=0, .size=sizeof(struct _block_byref_i) )};
struct __block_literal_5 _block_literal = {
&_NSConcreteStackBlock,
(1<<25)|(1<<29), <uninitialized>,
__block_invoke_5,
&__block_descriptor_5,
2,
};
Importing ``__attribute__((NSObject))`` ``__block`` variables
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
A ``__block`` variable that is also marked ``__attribute__((NSObject))`` should
have ``byref_keep`` and ``byref_dispose`` helper functions that use
``_Block_object_assign`` and ``_Block_object_dispose``.
``__block`` escapes
^^^^^^^^^^^^^^^^^^^
Because ``Blocks`` referencing ``__block`` variables may have ``Block_copy()``
performed upon them the underlying storage for the variables may move to the
heap. In Objective-C Garbage Collection Only compilation environments the heap
used is the garbage collected one and no further action is required. Otherwise
the compiler must issue a call to potentially release any heap storage for
``__block`` variables at all escapes or terminations of their scope. The call
should be:
.. code-block:: c
_Block_object_dispose(&_block_byref_foo, BLOCK_FIELD_IS_BYREF);
Nesting
^^^^^^^
``Blocks`` may contain ``Block`` literal expressions. Any variables used within
inner blocks are imported into all enclosing ``Block`` scopes even if the
variables are not used. This includes ``const`` imports as well as ``__block``
variables.
Objective C Extensions to ``Blocks``
====================================
Importing Objects
-----------------
Objects should be treated as ``__attribute__((NSObject))`` variables; all
``copy_helper``, ``dispose_helper``, ``byref_keep``, and ``byref_dispose``
helper functions should use ``_Block_object_assign`` and
``_Block_object_dispose``. There should be no code generated that uses
``*-retain`` or ``*-release`` methods.
``Blocks`` as Objects
---------------------
The compiler will treat ``Blocks`` as objects when synthesizing property setters
and getters, will characterize them as objects when generating garbage
collection strong and weak layout information in the same manner as objects, and
will issue strong and weak write-barrier assignments in the same manner as
objects.
``__weak __block`` Support
--------------------------
Objective-C (and Objective-C++) support the ``__weak`` attribute on ``__block``
variables. Under normal circumstances the compiler uses the Objective-C runtime
helper support functions ``objc_assign_weak`` and ``objc_read_weak``. Both
should continue to be used for all reads and writes of ``__weak __block``
variables:
.. code-block:: c
objc_read_weak(&block->byref_i->forwarding->i)
The ``__weak`` variable is stored in a ``_block_byref_foo`` structure and the
``Block`` has copy and dispose helpers for this structure that call:
.. code-block:: c
_Block_object_assign(&dest->_block_byref_i, src-> _block_byref_i, BLOCK_FIELD_IS_WEAK | BLOCK_FIELD_IS_BYREF);
and:
.. code-block:: c
_Block_object_dispose(src->_block_byref_i, BLOCK_FIELD_IS_WEAK | BLOCK_FIELD_IS_BYREF);
In turn, the ``block_byref`` copy support helpers distinguish between whether
the ``__block`` variable is a ``Block`` or not and should either call:
.. code-block:: c
_Block_object_assign(&dest->_block_byref_i, src->_block_byref_i, BLOCK_FIELD_IS_WEAK | BLOCK_FIELD_IS_OBJECT | BLOCK_BYREF_CALLER);
for something declared as an object or:
.. code-block:: c
_Block_object_assign(&dest->_block_byref_i, src->_block_byref_i, BLOCK_FIELD_IS_WEAK | BLOCK_FIELD_IS_BLOCK | BLOCK_BYREF_CALLER);
for something declared as a ``Block``.
A full example follows:
.. code-block:: c
__block __weak id obj = <initialization expression>;
functioncall(^{ [obj somemessage]; });
would translate to:
.. code-block:: c
struct _block_byref_obj {
void *isa; // uninitialized
struct _block_byref_obj *forwarding;
int flags; //refcount;
int size;
void (*byref_keep)(struct _block_byref_i *dst, struct _block_byref_i *src);
void (*byref_dispose)(struct _block_byref_i *);
id captured_obj;
};
void _block_byref_obj_keep(struct _block_byref_voidBlock *dst, struct _block_byref_voidBlock *src) {
//_Block_copy_assign(&dst->captured_obj, src->captured_obj, 0);
_Block_object_assign(&dst->captured_obj, src->captured_obj, BLOCK_FIELD_IS_OBJECT | BLOCK_FIELD_IS_WEAK | BLOCK_BYREF_CALLER);
}
void _block_byref_obj_dispose(struct _block_byref_voidBlock *param) {
//_Block_destroy(param->captured_obj, 0);
_Block_object_dispose(param->captured_obj, BLOCK_FIELD_IS_OBJECT | BLOCK_FIELD_IS_WEAK | BLOCK_BYREF_CALLER);
};
for the block ``byref`` part and:
.. code-block:: c
struct __block_literal_5 {
void *isa;
int flags;
int reserved;
void (*invoke)(struct __block_literal_5 *);
struct __block_descriptor_5 *descriptor;
struct _block_byref_obj *byref_obj;
};
void __block_invoke_5(struct __block_literal_5 *_block) {
[objc_read_weak(&_block->byref_obj->forwarding->captured_obj) somemessage];
}
void __block_copy_5(struct __block_literal_5 *dst, struct __block_literal_5 *src) {
//_Block_byref_assign_copy(&dst->byref_obj, src->byref_obj);
_Block_object_assign(&dst->byref_obj, src->byref_obj, BLOCK_FIELD_IS_BYREF | BLOCK_FIELD_IS_WEAK);
}
void __block_dispose_5(struct __block_literal_5 *src) {
//_Block_byref_release(src->byref_obj);
_Block_object_dispose(src->byref_obj, BLOCK_FIELD_IS_BYREF | BLOCK_FIELD_IS_WEAK);
}
static struct __block_descriptor_5 {
unsigned long int reserved;
unsigned long int Block_size;
void (*copy_helper)(struct __block_literal_5 *dst, struct __block_literal_5 *src);
void (*dispose_helper)(struct __block_literal_5 *);
} __block_descriptor_5 = { 0, sizeof(struct __block_literal_5), __block_copy_5, __block_dispose_5 };
and within the compound statement:
.. code-block:: c
truct _block_byref_obj obj = {( .forwarding=&obj, .flags=(1<<25), .size=sizeof(struct _block_byref_obj),
.byref_keep=_block_byref_obj_keep, .byref_dispose=_block_byref_obj_dispose,
.captured_obj = <initialization expression> )};
truct __block_literal_5 _block_literal = {
&_NSConcreteStackBlock,
(1<<25)|(1<<29), <uninitialized>,
__block_invoke_5,
&__block_descriptor_5,
&obj, // a reference to the on-stack structure containing "captured_obj"
};
functioncall(_block_literal->invoke(&_block_literal));
C++ Support
===========
Within a block stack based C++ objects are copied into ``const`` copies using
the copy constructor. It is an error if a stack based C++ object is used within
a block if it does not have a copy constructor. In addition both copy and
destroy helper routines must be synthesized for the block to support the
``Block_copy()`` operation, and the flags work marked with the (1<<26) bit in
addition to the (1<<25) bit. The copy helper should call the constructor using
appropriate offsets of the variable within the supplied stack based block source
and heap based destination for all ``const`` constructed copies, and similarly
should call the destructor in the destroy routine.
As an example, suppose a C++ class ``FOO`` existed with a copy constructor.
Within a code block a stack version of a ``FOO`` object is declared and used
within a ``Block`` literal expression:
.. code-block:: c++
{
FOO foo;
void (^block)(void) = ^{ printf("%d\n", foo.value()); };
}
The compiler would synthesize:
.. code-block:: c++
struct __block_literal_10 {
void *isa;
int flags;
int reserved;
void (*invoke)(struct __block_literal_10 *);
struct __block_descriptor_10 *descriptor;
const FOO foo;
};
void __block_invoke_10(struct __block_literal_10 *_block) {
printf("%d\n", _block->foo.value());
}
void __block_literal_10(struct __block_literal_10 *dst, struct __block_literal_10 *src) {
FOO_ctor(&dst->foo, &src->foo);
}
void __block_dispose_10(struct __block_literal_10 *src) {
FOO_dtor(&src->foo);
}
static struct __block_descriptor_10 {
unsigned long int reserved;
unsigned long int Block_size;
void (*copy_helper)(struct __block_literal_10 *dst, struct __block_literal_10 *src);
void (*dispose_helper)(struct __block_literal_10 *);
} __block_descriptor_10 = { 0, sizeof(struct __block_literal_10), __block_copy_10, __block_dispose_10 };
and the code would be:
.. code-block:: c++
{
FOO foo;
comp_ctor(&foo); // default constructor
struct __block_literal_10 _block_literal = {
&_NSConcreteStackBlock,
(1<<25)|(1<<26)|(1<<29), <uninitialized>,
__block_invoke_10,
&__block_descriptor_10,
};
comp_ctor(&_block_literal->foo, &foo); // const copy into stack version
struct __block_literal_10 &block = &_block_literal; // assign literal to block variable
block->invoke(block); // invoke block
comp_dtor(&_block_literal->foo); // destroy stack version of const block copy
comp_dtor(&foo); // destroy original version
}
C++ objects stored in ``__block`` storage start out on the stack in a
``block_byref`` data structure as do other variables. Such objects (if not
``const`` objects) must support a regular copy constructor. The ``block_byref``
data structure will have copy and destroy helper routines synthesized by the
compiler. The copy helper will have code created to perform the copy
constructor based on the initial stack ``block_byref`` data structure, and will
also set the (1<<26) bit in addition to the (1<<25) bit. The destroy helper
will have code to do the destructor on the object stored within the supplied
``block_byref`` heap data structure. For example,
.. code-block:: c++
__block FOO blockStorageFoo;
requires the normal constructor for the embedded ``blockStorageFoo`` object:
.. code-block:: c++
FOO_ctor(& _block_byref_blockStorageFoo->blockStorageFoo);
and at scope termination the destructor:
.. code-block:: c++
FOO_dtor(& _block_byref_blockStorageFoo->blockStorageFoo);
Note that the forwarding indirection is *NOT* used.
The compiler would need to generate (if used from a block literal) the following
copy/dispose helpers:
.. code-block:: c++
void _block_byref_obj_keep(struct _block_byref_blockStorageFoo *dst, struct _block_byref_blockStorageFoo *src) {
FOO_ctor(&dst->blockStorageFoo, &src->blockStorageFoo);
}
void _block_byref_obj_dispose(struct _block_byref_blockStorageFoo *src) {
FOO_dtor(&src->blockStorageFoo);
}
for the appropriately named constructor and destructor for the class/struct
``FOO``.
To support member variable and function access the compiler will synthesize a
``const`` pointer to a block version of the ``this`` pointer.
.. _RuntimeHelperFunctions:
Runtime Helper Functions
========================
The runtime helper functions are described in
``/usr/local/include/Block_private.h``. To summarize their use, a ``Block``
requires copy/dispose helpers if it imports any block variables, ``__block``
storage variables, ``__attribute__((NSObject))`` variables, or C++ ``const``
copied objects with constructor/destructors. The (1<<26) bit is set and
functions are generated.
The block copy helper function should, for each of the variables of the type
mentioned above, call:
.. code-block:: c
_Block_object_assign(&dst->target, src->target, BLOCK_FIELD_<appropo>);
in the copy helper and:
.. code-block:: c
_Block_object_dispose(->target, BLOCK_FIELD_<appropo>);
in the dispose helper where ``<appropo>`` is:
.. code-block:: c
enum {
BLOCK_FIELD_IS_OBJECT = 3, // id, NSObject, __attribute__((NSObject)), block, ...
BLOCK_FIELD_IS_BLOCK = 7, // a block variable
BLOCK_FIELD_IS_BYREF = 8, // the on stack structure holding the __block variable
BLOCK_FIELD_IS_WEAK = 16, // declared __weak
BLOCK_BYREF_CALLER = 128, // called from byref copy/dispose helpers
};
and of course the constructors/destructors for ``const`` copied C++ objects.
The ``block_byref`` data structure similarly requires copy/dispose helpers for
block variables, ``__attribute__((NSObject))`` variables, or C++ ``const``
copied objects with constructor/destructors, and again the (1<<26) bit is set
and functions are generated in the same manner.
Under ObjC we allow ``__weak`` as an attribute on ``__block`` variables, and
this causes the addition of ``BLOCK_FIELD_IS_WEAK`` orred onto the
``BLOCK_FIELD_IS_BYREF`` flag when copying the ``block_byref`` structure in the
``Block`` copy helper, and onto the ``BLOCK_FIELD_<appropo>`` field within the
``block_byref`` copy/dispose helper calls.
The prototypes, and summary, of the helper functions are:
.. code-block:: c
/* Certain field types require runtime assistance when being copied to the
heap. The following function is used to copy fields of types: blocks,
pointers to byref structures, and objects (including
__attribute__((NSObject)) pointers. BLOCK_FIELD_IS_WEAK is orthogonal to
the other choices which are mutually exclusive. Only in a Block copy
helper will one see BLOCK_FIELD_IS_BYREF.
*/
void _Block_object_assign(void *destAddr, const void *object, const int flags);
/* Similarly a compiler generated dispose helper needs to call back for each
field of the byref data structure. (Currently the implementation only
packs one field into the byref structure but in principle there could be
more). The same flags used in the copy helper should be used for each
call generated to this function:
*/
void _Block_object_dispose(const void *object, const int flags);
Copyright
=========
Copyright 2008-2010 Apple, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@ -1,669 +1 @@
Block Implementation Specification
Copyright 2008-2010 Apple, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
0. History
2008/7/14 - created
2008/8/21 - revised, C++
2008/9/24 - add NULL isa field to __block storage
2008/10/1 - revise block layout to use a static descriptor structure
2008/10/6 - revise block layout to use an unsigned long int flags
2008/10/28 - specify use of _Block_object_assign/dispose for all "Object" types in helper functions
2008/10/30 - revise new layout to have invoke function in same place
2008/10/30 - add __weak support
2010/3/16 - rev for stret return, signature field
2010/4/6 - improved wording
This document describes the Apple ABI implementation specification of Blocks.
The first shipping version of this ABI is found in Mac OS X 10.6, and shall be referred to as 10.6.ABI. As of 2010/3/16, the following describes the ABI contract with the runtime and the compiler, and, as necessary, will be referred to as ABI.2010.3.16.
Since the Apple ABI references symbols from other elements of the system, any attempt to use this ABI on systems prior to SnowLeopard is undefined.
1. High Level
The ABI of blocks consist of their layout and the runtime functions required by the compiler.
A Block consists of a structure of the following form:
struct Block_literal_1 {
void *isa; // initialized to &_NSConcreteStackBlock or &_NSConcreteGlobalBlock
int flags;
int reserved;
void (*invoke)(void *, ...);
struct Block_descriptor_1 {
unsigned long int reserved; // NULL
unsigned long int size; // sizeof(struct Block_literal_1)
// optional helper functions
void (*copy_helper)(void *dst, void *src); // IFF (1<<25)
void (*dispose_helper)(void *src); // IFF (1<<25)
// required ABI.2010.3.16
const char *signature; // IFF (1<<30)
} *descriptor;
// imported variables
};
The following flags bits are in use thusly for a possible ABI.2010.3.16:
enum {
BLOCK_HAS_COPY_DISPOSE = (1 << 25),
BLOCK_HAS_CTOR = (1 << 26), // helpers have C++ code
BLOCK_IS_GLOBAL = (1 << 28),
BLOCK_HAS_STRET = (1 << 29), // IFF BLOCK_HAS_SIGNATURE
BLOCK_HAS_SIGNATURE = (1 << 30),
};
In 10.6.ABI the (1<<29) was usually set and was always ignored by the runtime - it had been a transitional marker that did not get deleted after the transition. This bit is now paired with (1<<30), and represented as the pair (3<<30), for the following combinations of valid bit settings, and their meanings.
switch (flags & (3<<29)) {
case (0<<29): 10.6.ABI, no signature field available
case (1<<29): 10.6.ABI, no signature field available
case (2<<29): ABI.2010.3.16, regular calling convention, presence of signature field
case (3<<29): ABI.2010.3.16, stret calling convention, presence of signature field,
}
The signature field is not always populated.
The following discussions are presented as 10.6.ABI otherwise.
Block literals may occur within functions where the structure is created in stack local memory. They may also appear as initialization expressions for Block variables of global or static local variables.
When a Block literal expression is evaluated the stack based structure is initialized as follows:
1) static descriptor structure is declared and initialized as follows:
1a) the invoke function pointer is set to a function that takes the Block structure as its first argument and the rest of the arguments (if any) to the Block and executes the Block compound statement.
1b) the size field is set to the size of the following Block literal structure.
1c) the copy_helper and dispose_helper function pointers are set to respective helper functions if they are required by the Block literal
2) a stack (or global) Block literal data structure is created and initialized as follows:
2a) the isa field is set to the address of the external _NSConcreteStackBlock, which is a block of uninitialized memory supplied in libSystem, or _NSConcreteGlobalBlock if this is a static or file level block literal.
2) The flags field is set to zero unless there are variables imported into the block that need helper functions for program level Block_copy() and Block_release() operations, in which case the (1<<25) flags bit is set.
As an example, the Block literal expression
^ { printf("hello world\n"); }
would cause to be created on a 32-bit system:
struct __block_literal_1 {
void *isa;
int flags;
int reserved;
void (*invoke)(struct __block_literal_1 *);
struct __block_descriptor_1 *descriptor;
};
void __block_invoke_1(struct __block_literal_1 *_block) {
printf("hello world\n");
}
static struct __block_descriptor_1 {
unsigned long int reserved;
unsigned long int Block_size;
} __block_descriptor_1 = { 0, sizeof(struct __block_literal_1), __block_invoke_1 };
and where the block literal appeared
struct __block_literal_1 _block_literal = {
&_NSConcreteStackBlock,
(1<<29), <uninitialized>,
__block_invoke_1,
&__block_descriptor_1
};
Blocks import other Block references, const copies of other variables, and variables marked __block. In Objective-C variables may additionally be objects.
When a Block literal expression used as the initial value of a global or static local variable it is initialized as follows:
struct __block_literal_1 __block_literal_1 = {
&_NSConcreteGlobalBlock,
(1<<28)|(1<<29), <uninitialized>,
__block_invoke_1,
&__block_descriptor_1
};
that is, a different address is provided as the first value and a particular (1<<28) bit is set in the flags field, and otherwise it is the same as for stack based Block literals. This is an optimization that can be used for any Block literal that imports no const or __block storage variables.
2. Imported Variables
Variables of "auto" storage class are imported as const copies. Variables of "__block" storage class are imported as a pointer to an enclosing data structure. Global variables are simply referenced and not considered as imported.
2.1 Imported const copy variables
Automatic storage variables not marked with __block are imported as const copies.
The simplest example is that of importing a variable of type int.
int x = 10;
void (^vv)(void) = ^{ printf("x is %d\n", x); }
x = 11;
vv();
would be compiled
struct __block_literal_2 {
void *isa;
int flags;
int reserved;
void (*invoke)(struct __block_literal_2 *);
struct __block_descriptor_2 *descriptor;
const int x;
};
void __block_invoke_2(struct __block_literal_2 *_block) {
printf("x is %d\n", _block->x);
}
static struct __block_descriptor_2 {
unsigned long int reserved;
unsigned long int Block_size;
} __block_descriptor_2 = { 0, sizeof(struct __block_literal_2) };
and
struct __block_literal_2 __block_literal_2 = {
&_NSConcreteStackBlock,
(1<<29), <uninitialized>,
__block_invoke_2,
&__block_descriptor_2,
x
};
In summary, scalars, structures, unions, and function pointers are generally imported as const copies with no need for helper functions.
2.2 Imported const copy of Block reference
The first case where copy and dispose helper functions are required is for the case of when a block itself is imported. In this case both a copy_helper function and a dispose_helper function are needed. The copy_helper function is passed both the existing stack based pointer and the pointer to the new heap version and should call back into the runtime to actually do the copy operation on the imported fields within the block. The runtime functions are all described in Section 5.0 Runtime Helper Functions.
An example:
void (^existingBlock)(void) = ...;
void (^vv)(void) = ^{ existingBlock(); }
vv();
struct __block_literal_3 {
...; // existing block
};
struct __block_literal_4 {
void *isa;
int flags;
int reserved;
void (*invoke)(struct __block_literal_4 *);
struct __block_literal_3 *const existingBlock;
};
void __block_invoke_4(struct __block_literal_2 *_block) {
__block->existingBlock->invoke(__block->existingBlock);
}
void __block_copy_4(struct __block_literal_4 *dst, struct __block_literal_4 *src) {
//_Block_copy_assign(&dst->existingBlock, src->existingBlock, 0);
_Block_object_assign(&dst->existingBlock, src->existingBlock, BLOCK_FIELD_IS_BLOCK);
}
void __block_dispose_4(struct __block_literal_4 *src) {
// was _Block_destroy
_Block_object_dispose(src->existingBlock, BLOCK_FIELD_IS_BLOCK);
}
static struct __block_descriptor_4 {
unsigned long int reserved;
unsigned long int Block_size;
void (*copy_helper)(struct __block_literal_4 *dst, struct __block_literal_4 *src);
void (*dispose_helper)(struct __block_literal_4 *);
} __block_descriptor_4 = {
0,
sizeof(struct __block_literal_4),
__block_copy_4,
__block_dispose_4,
};
and where it is used
struct __block_literal_4 _block_literal = {
&_NSConcreteStackBlock,
(1<<25)|(1<<29), <uninitialized>
__block_invoke_4,
& __block_descriptor_4
existingBlock,
};
2.2.1 Importing __attribute__((NSObject)) variables.
GCC introduces __attribute__((NSObject)) on structure pointers to mean "this is an object". This is useful because many low level data structures are declared as opaque structure pointers, e.g. CFStringRef, CFArrayRef, etc. When used from C, however, these are still really objects and are the second case where that requires copy and dispose helper functions to be generated. The copy helper functions generated by the compiler should use the _Block_object_assign runtime helper function and in the dispose helper the _Block_object_dispose runtime helper function should be called.
For example, block xyzzy in the following
struct Opaque *__attribute__((NSObject)) objectPointer = ...;
...
void (^xyzzy)(void) = ^{ CFPrint(objectPointer); };
would have helper functions
void __block_copy_xyzzy(struct __block_literal_5 *dst, struct __block_literal_5 *src) {
_Block_object_assign(&dst->objectPointer, src-> objectPointer, BLOCK_FIELD_IS_OBJECT);
}
void __block_dispose_xyzzy(struct __block_literal_5 *src) {
_Block_object_dispose(src->objectPointer, BLOCK_FIELD_IS_OBJECT);
}
generated.
2.3 Imported __block marked variables.
2.3.1 Layout of __block marked variables
The compiler must embed variables that are marked __block in a specialized structure of the form:
struct _block_byref_xxxx {
void *isa;
struct Block_byref *forwarding;
int flags; //refcount;
int size;
typeof(marked_variable) marked_variable;
};
Variables of certain types require helper functions for when Block_copy() and Block_release() are performed upon a referencing Block. At the "C" level only variables that are of type Block or ones that have __attribute__((NSObject)) marked require helper functions. In Objective-C objects require helper functions and in C++ stack based objects require helper functions. Variables that require helper functions use the form:
struct _block_byref_xxxx {
void *isa;
struct _block_byref_xxxx *forwarding;
int flags; //refcount;
int size;
// helper functions called via Block_copy() and Block_release()
void (*byref_keep)(void *dst, void *src);
void (*byref_dispose)(void *);
typeof(marked_variable) marked_variable;
};
The structure is initialized such that
a) the forwarding pointer is set to the beginning of its enclosing structure,
b) the size field is initialized to the total size of the enclosing structure,
c) the flags field is set to either 0 if no helper functions are needed or (1<<25) if they are,
d) the helper functions are initialized (if present)
e) the variable itself is set to its initial value.
f) the isa field is set to NULL
2.3.2 Access to __block variables from within its lexical scope.
In order to "move" the variable to the heap upon a copy_helper operation the compiler must rewrite access to such a variable to be indirect through the structures forwarding pointer. For example:
int __block i = 10;
i = 11;
would be rewritten to be:
struct _block_byref_i {
void *isa;
struct _block_byref_i *forwarding;
int flags; //refcount;
int size;
int captured_i;
} i = { NULL, &i, 0, sizeof(struct _block_byref_i), 10 };
i.forwarding->captured_i = 11;
In the case of a Block reference variable being marked __block the helper code generated must use the _Block_object_assign and _Block_object_dispose routines supplied by the runtime to make the copies. For example:
__block void (voidBlock)(void) = blockA;
voidBlock = blockB;
would translate into
struct _block_byref_voidBlock {
void *isa;
struct _block_byref_voidBlock *forwarding;
int flags; //refcount;
int size;
void (*byref_keep)(struct _block_byref_voidBlock *dst, struct _block_byref_voidBlock *src);
void (*byref_dispose)(struct _block_byref_voidBlock *);
void (^captured_voidBlock)(void);
};
void _block_byref_keep_helper(struct _block_byref_voidBlock *dst, struct _block_byref_voidBlock *src) {
//_Block_copy_assign(&dst->captured_voidBlock, src->captured_voidBlock, 0);
_Block_object_assign(&dst->captured_voidBlock, src->captured_voidBlock, BLOCK_FIELD_IS_BLOCK | BLOCK_BYREF_CALLER);
}
void _block_byref_dispose_helper(struct _block_byref_voidBlock *param) {
//_Block_destroy(param->captured_voidBlock, 0);
_Block_object_dispose(param->captured_voidBlock, BLOCK_FIELD_IS_BLOCK | BLOCK_BYREF_CALLER)}
and
struct _block_byref_voidBlock voidBlock = {( .forwarding=&voidBlock, .flags=(1<<25), .size=sizeof(struct _block_byref_voidBlock *),
.byref_keep=_block_byref_keep_helper, .byref_dispose=_block_byref_dispose_helper,
.captured_voidBlock=blockA )};
voidBlock.forwarding->captured_voidBlock = blockB;
2.3.3 Importing __block variables into Blocks
A Block that uses a __block variable in its compound statement body must import the variable and emit copy_helper and dispose_helper helper functions that, in turn, call back into the runtime to actually copy or release the byref data block using the functions _Block_object_assign and _Block_object_dispose.
For example:
int __block i = 2;
functioncall(^{ i = 10; });
would translate to
struct _block_byref_i {
void *isa; // set to NULL
struct _block_byref_voidBlock *forwarding;
int flags; //refcount;
int size;
void (*byref_keep)(struct _block_byref_i *dst, struct _block_byref_i *src);
void (*byref_dispose)(struct _block_byref_i *);
int captured_i;
};
struct __block_literal_5 {
void *isa;
int flags;
int reserved;
void (*invoke)(struct __block_literal_5 *);
struct __block_descriptor_5 *descriptor;
struct _block_byref_i *i_holder;
};
void __block_invoke_5(struct __block_literal_5 *_block) {
_block->forwarding->captured_i = 10;
}
void __block_copy_5(struct __block_literal_5 *dst, struct __block_literal_5 *src) {
//_Block_byref_assign_copy(&dst->captured_i, src->captured_i);
_Block_object_assign(&dst->captured_i, src->captured_i, BLOCK_FIELD_IS_BYREF | BLOCK_BYREF_CALLER);
}
void __block_dispose_5(struct __block_literal_5 *src) {
//_Block_byref_release(src->captured_i);
_Block_object_dispose(src->captured_i, BLOCK_FIELD_IS_BYREF | BLOCK_BYREF_CALLER);
}
static struct __block_descriptor_5 {
unsigned long int reserved;
unsigned long int Block_size;
void (*copy_helper)(struct __block_literal_5 *dst, struct __block_literal_5 *src);
void (*dispose_helper)(struct __block_literal_5 *);
} __block_descriptor_5 = { 0, sizeof(struct __block_literal_5) __block_copy_5, __block_dispose_5 };
and
struct _block_byref_i i = {( .forwarding=&i, .flags=0, .size=sizeof(struct _block_byref_i) )};
struct __block_literal_5 _block_literal = {
&_NSConcreteStackBlock,
(1<<25)|(1<<29), <uninitialized>,
__block_invoke_5,
&__block_descriptor_5,
2,
};
2.3.4 Importing __attribute__((NSObject)) __block variables
A __block variable that is also marked __attribute__((NSObject)) should have byref_keep and byref_dispose helper functions that use _Block_object_assign and _Block_object_dispose.
2.3.5 __block escapes
Because Blocks referencing __block variables may have Block_copy() performed upon them the underlying storage for the variables may move to the heap. In Objective-C Garbage Collection Only compilation environments the heap used is the garbage collected one and no further action is required. Otherwise the compiler must issue a call to potentially release any heap storage for __block variables at all escapes or terminations of their scope. The call should be:
_Block_object_dispose(&_block_byref_xxx, BLOCK_FIELD_IS_BYREF);
2.3.6 Nesting
Blocks may contain Block literal expressions. Any variables used within inner blocks are imported into all enclosing Block scopes even if the variables are not used. This includes const imports as well as __block variables.
3. Objective C Extensions to Blocks
3.1 Importing Objects
Objects should be treated as __attribute__((NSObject)) variables; all copy_helper, dispose_helper, byref_keep, and byref_dispose helper functions should use _Block_object_assign and _Block_object_dispose. There should be no code generated that uses -retain or -release methods.
3.2 Blocks as Objects
The compiler will treat Blocks as objects when synthesizing property setters and getters, will characterize them as objects when generating garbage collection strong and weak layout information in the same manner as objects, and will issue strong and weak write-barrier assignments in the same manner as objects.
3.3 __weak __block Support
Objective-C (and Objective-C++) support the __weak attribute on __block variables. Under normal circumstances the compiler uses the Objective-C runtime helper support functions objc_assign_weak and objc_read_weak. Both should continue to be used for all reads and writes of __weak __block variables:
objc_read_weak(&block->byref_i->forwarding->i)
The __weak variable is stored in a _block_byref_xxxx structure and the Block has copy and dispose helpers for this structure that call:
_Block_object_assign(&dest->_block_byref_i, src-> _block_byref_i, BLOCK_FIELD_IS_WEAK | BLOCK_FIELD_IS_BYREF);
and
_Block_object_dispose(src->_block_byref_i, BLOCK_FIELD_IS_WEAK | BLOCK_FIELD_IS_BYREF);
In turn, the block_byref copy support helpers distinguish between whether the __block variable is a Block or not and should either call:
_Block_object_assign(&dest->_block_byref_i, src->_block_byref_i, BLOCK_FIELD_IS_WEAK | BLOCK_FIELD_IS_OBJECT | BLOCK_BYREF_CALLER);
for something declared as an object or
_Block_object_assign(&dest->_block_byref_i, src->_block_byref_i, BLOCK_FIELD_IS_WEAK | BLOCK_FIELD_IS_BLOCK | BLOCK_BYREF_CALLER);
for something declared as a Block.
A full example follows:
__block __weak id obj = <initialization expression>;
functioncall(^{ [obj somemessage]; });
would translate to
struct _block_byref_obj {
void *isa; // uninitialized
struct _block_byref_obj *forwarding;
int flags; //refcount;
int size;
void (*byref_keep)(struct _block_byref_i *dst, struct _block_byref_i *src);
void (*byref_dispose)(struct _block_byref_i *);
id captured_obj;
};
void _block_byref_obj_keep(struct _block_byref_voidBlock *dst, struct _block_byref_voidBlock *src) {
//_Block_copy_assign(&dst->captured_obj, src->captured_obj, 0);
_Block_object_assign(&dst->captured_obj, src->captured_obj, BLOCK_FIELD_IS_OBJECT | BLOCK_FIELD_IS_WEAK | BLOCK_BYREF_CALLER);
}
void _block_byref_obj_dispose(struct _block_byref_voidBlock *param) {
//_Block_destroy(param->captured_obj, 0);
_Block_object_dispose(param->captured_obj, BLOCK_FIELD_IS_OBJECT | BLOCK_FIELD_IS_WEAK | BLOCK_BYREF_CALLER);
};
for the block byref part and
struct __block_literal_5 {
void *isa;
int flags;
int reserved;
void (*invoke)(struct __block_literal_5 *);
struct __block_descriptor_5 *descriptor;
struct _block_byref_obj *byref_obj;
};
void __block_invoke_5(struct __block_literal_5 *_block) {
[objc_read_weak(&_block->byref_obj->forwarding->captured_obj) somemessage];
}
void __block_copy_5(struct __block_literal_5 *dst, struct __block_literal_5 *src) {
//_Block_byref_assign_copy(&dst->byref_obj, src->byref_obj);
_Block_object_assign(&dst->byref_obj, src->byref_obj, BLOCK_FIELD_IS_BYREF | BLOCK_FIELD_IS_WEAK);
}
void __block_dispose_5(struct __block_literal_5 *src) {
//_Block_byref_release(src->byref_obj);
_Block_object_dispose(src->byref_obj, BLOCK_FIELD_IS_BYREF | BLOCK_FIELD_IS_WEAK);
}
static struct __block_descriptor_5 {
unsigned long int reserved;
unsigned long int Block_size;
void (*copy_helper)(struct __block_literal_5 *dst, struct __block_literal_5 *src);
void (*dispose_helper)(struct __block_literal_5 *);
} __block_descriptor_5 = { 0, sizeof(struct __block_literal_5), __block_copy_5, __block_dispose_5 };
and within the compound statement:
struct _block_byref_obj obj = {( .forwarding=&obj, .flags=(1<<25), .size=sizeof(struct _block_byref_obj),
.byref_keep=_block_byref_obj_keep, .byref_dispose=_block_byref_obj_dispose,
.captured_obj = <initialization expression> )};
struct __block_literal_5 _block_literal = {
&_NSConcreteStackBlock,
(1<<25)|(1<<29), <uninitialized>,
__block_invoke_5,
&__block_descriptor_5,
&obj, // a reference to the on-stack structure containing "captured_obj"
};
functioncall(_block_literal->invoke(&_block_literal));
4.0 C++ Support
Within a block stack based C++ objects are copied into const copies using the copy constructor. It is an error if a stack based C++ object is used within a block if it does not have a copy constructor. In addition both copy and destroy helper routines must be synthesized for the block to support the Block_copy() operation, and the flags work marked with the (1<<26) bit in addition to the (1<<25) bit. The copy helper should call the constructor using appropriate offsets of the variable within the supplied stack based block source and heap based destination for all const constructed copies, and similarly should call the destructor in the destroy routine.
As an example, suppose a C++ class FOO existed with a copy constructor. Within a code block a stack version of a FOO object is declared and used within a Block literal expression:
{
FOO foo;
void (^block)(void) = ^{ printf("%d\n", foo.value()); };
}
The compiler would synthesize
struct __block_literal_10 {
void *isa;
int flags;
int reserved;
void (*invoke)(struct __block_literal_10 *);
struct __block_descriptor_10 *descriptor;
const FOO foo;
};
void __block_invoke_10(struct __block_literal_10 *_block) {
printf("%d\n", _block->foo.value());
}
void __block_literal_10(struct __block_literal_10 *dst, struct __block_literal_10 *src) {
FOO_ctor(&dst->foo, &src->foo);
}
void __block_dispose_10(struct __block_literal_10 *src) {
FOO_dtor(&src->foo);
}
static struct __block_descriptor_10 {
unsigned long int reserved;
unsigned long int Block_size;
void (*copy_helper)(struct __block_literal_10 *dst, struct __block_literal_10 *src);
void (*dispose_helper)(struct __block_literal_10 *);
} __block_descriptor_10 = { 0, sizeof(struct __block_literal_10), __block_copy_10, __block_dispose_10 };
and the code would be:
{
FOO foo;
comp_ctor(&foo); // default constructor
struct __block_literal_10 _block_literal = {
&_NSConcreteStackBlock,
(1<<25)|(1<<26)|(1<<29), <uninitialized>,
__block_invoke_10,
&__block_descriptor_10,
};
comp_ctor(&_block_literal->foo, &foo); // const copy into stack version
struct __block_literal_10 &block = &_block_literal; // assign literal to block variable
block->invoke(block); // invoke block
comp_dtor(&_block_literal->foo); // destroy stack version of const block copy
comp_dtor(&foo); // destroy original version
}
C++ objects stored in __block storage start out on the stack in a block_byref data structure as do other variables. Such objects (if not const objects) must support a regular copy constructor. The block_byref data structure will have copy and destroy helper routines synthesized by the compiler. The copy helper will have code created to perform the copy constructor based on the initial stack block_byref data structure, and will also set the (1<<26) bit in addition to the (1<<25) bit. The destroy helper will have code to do the destructor on the object stored within the supplied block_byref heap data structure. For example,
__block FOO blockStorageFoo;
requires the normal constructor for the embedded blockStorageFoo object
FOO_ctor(& _block_byref_blockStorageFoo->blockStorageFoo);
and at scope termination the destructor:
FOO_dtor(& _block_byref_blockStorageFoo->blockStorageFoo);
Note that the forwarding indirection is NOT used.
The compiler would need to generate (if used from a block literal) the following copy/dispose helpers:
void _block_byref_obj_keep(struct _block_byref_blockStorageFoo *dst, struct _block_byref_blockStorageFoo *src) {
FOO_ctor(&dst->blockStorageFoo, &src->blockStorageFoo);
}
void _block_byref_obj_dispose(struct _block_byref_blockStorageFoo *src) {
FOO_dtor(&src->blockStorageFoo);
}
for the appropriately named constructor and destructor for the class/struct FOO.
To support member variable and function access the compiler will synthesize a const pointer to a block version of the "this" pointer.
5.0 Runtime Helper Functions
The runtime helper functions are described in /usr/local/include/Block_private.h. To summarize their use, a block requires copy/dispose helpers if it imports any block variables, __block storage variables, __attribute__((NSObject)) variables, or C++ const copied objects with constructor/destructors. The (1<<26) bit is set and functions are generated.
The block copy helper function should, for each of the variables of the type mentioned above, call
_Block_object_assign(&dst->target, src->target, BLOCK_FIELD_<appropo>);
in the copy helper and
_Block_object_dispose(->target, BLOCK_FIELD_<appropo>);
in the dispose helper where
<appropo> is
enum {
BLOCK_FIELD_IS_OBJECT = 3, // id, NSObject, __attribute__((NSObject)), block, ...
BLOCK_FIELD_IS_BLOCK = 7, // a block variable
BLOCK_FIELD_IS_BYREF = 8, // the on stack structure holding the __block variable
BLOCK_FIELD_IS_WEAK = 16, // declared __weak
BLOCK_BYREF_CALLER = 128, // called from byref copy/dispose helpers
};
and of course the CTORs/DTORs for const copied C++ objects.
The block_byref data structure similarly requires copy/dispose helpers for block variables, __attribute__((NSObject)) variables, or C++ const copied objects with constructor/destructors, and again the (1<<26) bit is set and functions are generated in the same manner.
Under ObjC we allow __weak as an attribute on __block variables, and this causes the addition of BLOCK_FIELD_IS_WEAK orred onto the BLOCK_FIELD_IS_BYREF flag when copying the block_byref structure in the block copy helper, and onto the BLOCK_FIELD_<appropo> field within the block_byref copy/dispose helper calls.
The prototypes, and summary, of the helper functions are
/* Certain field types require runtime assistance when being copied to the heap. The following function is used
to copy fields of types: blocks, pointers to byref structures, and objects (including __attribute__((NSObject)) pointers.
BLOCK_FIELD_IS_WEAK is orthogonal to the other choices which are mutually exclusive.
Only in a Block copy helper will one see BLOCK_FIELD_IS_BYREF.
*/
void _Block_object_assign(void *destAddr, const void *object, const int flags);
/* Similarly a compiler generated dispose helper needs to call back for each field of the byref data structure.
(Currently the implementation only packs one field into the byref structure but in principle there could be more).
The same flags used in the copy helper should be used for each call generated to this function:
*/
void _Block_object_dispose(const void *object, const int flags);
*NOTE* This document has moved to http://clang.llvm.org/docs/Block-ABI-Apple.html.

361
docs/BlockLanguageSpec.rst Normal file
View File

@ -0,0 +1,361 @@
.. role:: block-term
=================================
Language Specification for Blocks
=================================
.. contents::
:local:
Revisions
=========
- 2008/2/25 --- created
- 2008/7/28 --- revised, ``__block`` syntax
- 2008/8/13 --- revised, Block globals
- 2008/8/21 --- revised, C++ elaboration
- 2008/11/1 --- revised, ``__weak`` support
- 2009/1/12 --- revised, explicit return types
- 2009/2/10 --- revised, ``__block`` objects need retain
Overview
========
A new derived type is introduced to C and, by extension, Objective-C,
C++, and Objective-C++
The Block Type
==============
Like function types, the :block-term:`Block type` is a pair consisting
of a result value type and a list of parameter types very similar to a
function type. Blocks are intended to be used much like functions with
the key distinction being that in addition to executable code they
also contain various variable bindings to automatic (stack) or managed
(heap) memory.
The abstract declarator,
.. code-block:: c
int (^)(char, float)
describes a reference to a Block that, when invoked, takes two
parameters, the first of type char and the second of type float, and
returns a value of type int. The Block referenced is of opaque data
that may reside in automatic (stack) memory, global memory, or heap
memory.
Block Variable Declarations
===========================
A :block-term:`variable with Block type` is declared using function
pointer style notation substituting ``^`` for ``*``. The following are
valid Block variable declarations:
.. code-block:: c
void (^blockReturningVoidWithVoidArgument)(void);
int (^blockReturningIntWithIntAndCharArguments)(int, char);
void (^arrayOfTenBlocksReturningVoidWithIntArgument[10])(int);
Variadic ``...`` arguments are supported. [variadic.c] A Block that
takes no arguments must specify void in the argument list [voidarg.c].
An empty parameter list does not represent, as K&R provide, an
unspecified argument list. Note: both gcc and clang support K&R style
as a convenience.
A Block reference may be cast to a pointer of arbitrary type and vice
versa. [cast.c] A Block reference may not be dereferenced via the
pointer dereference operator ``*``, and thus a Block's size may not be
computed at compile time. [sizeof.c]
Block Literal Expressions
=========================
A :block-term:`Block literal expression` produces a reference to a
Block. It is introduced by the use of the ``^`` token as a unary
operator.
.. code-block:: c
Block_literal_expression ::= ^ block_decl compound_statement_body
block_decl ::=
block_decl ::= parameter_list
block_decl ::= type_expression
where type expression is extended to allow ``^`` as a Block reference
(pointer) where ``*`` is allowed as a function reference (pointer).
The following Block literal:
.. code-block:: c
^ void (void) { printf("hello world\n"); }
produces a reference to a Block with no arguments with no return value.
The return type is optional and is inferred from the return
statements. If the return statements return a value, they all must
return a value of the same type. If there is no value returned the
inferred type of the Block is void; otherwise it is the type of the
return statement value.
If the return type is omitted and the argument list is ``( void )``,
the ``( void )`` argument list may also be omitted.
So:
.. code-block:: c
^ ( void ) { printf("hello world\n"); }
and:
.. code-block:: c
^ { printf("hello world\n"); }
are exactly equivalent constructs for the same expression.
The type_expression extends C expression parsing to accommodate Block
reference declarations as it accommodates function pointer
declarations.
Given:
.. code-block:: c
typedef int (*pointerToFunctionThatReturnsIntWithCharArg)(char);
pointerToFunctionThatReturnsIntWithCharArg functionPointer;
^ pointerToFunctionThatReturnsIntWithCharArg (float x) { return functionPointer; }
and:
.. code-block:: c
^ int ((*)(float x))(char) { return functionPointer; }
are equivalent expressions, as is:
.. code-block:: c
^(float x) { return functionPointer; }
[returnfunctionptr.c]
The compound statement body establishes a new lexical scope within
that of its parent. Variables used within the scope of the compound
statement are bound to the Block in the normal manner with the
exception of those in automatic (stack) storage. Thus one may access
functions and global variables as one would expect, as well as static
local variables. [testme]
Local automatic (stack) variables referenced within the compound
statement of a Block are imported and captured by the Block as const
copies. The capture (binding) is performed at the time of the Block
literal expression evaluation.
The compiler is not required to capture a variable if it can prove
that no references to the variable will actually be evaluated.
Programmers can force a variable to be captured by referencing it in a
statement at the beginning of the Block, like so:
.. code-block:: c
(void) foo;
This matters when capturing the variable has side-effects, as it can
in Objective-C or C++.
The lifetime of variables declared in a Block is that of a function;
each activation frame contains a new copy of variables declared within
the local scope of the Block. Such variable declarations should be
allowed anywhere [testme] rather than only when C99 parsing is
requested, including for statements. [testme]
Block literal expressions may occur within Block literal expressions
(nest) and all variables captured by any nested blocks are implicitly
also captured in the scopes of their enclosing Blocks.
A Block literal expression may be used as the initialization value for
Block variables at global or local static scope.
The Invoke Operator
===================
Blocks are :block-term:`invoked` using function call syntax with a
list of expression parameters of types corresponding to the
declaration and returning a result type also according to the
declaration. Given:
.. code-block:: c
int (^x)(char);
void (^z)(void);
int (^(*y))(char) = &x;
the following are all legal Block invocations:
.. code-block:: c
x('a');
(*y)('a');
(true ? x : *y)('a')
The Copy and Release Operations
===============================
The compiler and runtime provide :block-term:`copy` and
:block-term:`release` operations for Block references that create and,
in matched use, release allocated storage for referenced Blocks.
The copy operation ``Block_copy()`` is styled as a function that takes
an arbitrary Block reference and returns a Block reference of the same
type. The release operation, ``Block_release()``, is styled as a
function that takes an arbitrary Block reference and, if dynamically
matched to a Block copy operation, allows recovery of the referenced
allocated memory.
The ``__block`` Storage Qualifier
=================================
In addition to the new Block type we also introduce a new storage
qualifier, :block-term:`__block`, for local variables. [testme: a
__block declaration within a block literal] The ``__block`` storage
qualifier is mutually exclusive to the existing local storage
qualifiers auto, register, and static. [testme] Variables qualified by
``__block`` act as if they were in allocated storage and this storage
is automatically recovered after last use of said variable. An
implementation may choose an optimization where the storage is
initially automatic and only "moved" to allocated (heap) storage upon
a Block_copy of a referencing Block. Such variables may be mutated as
normal variables are.
In the case where a ``__block`` variable is a Block one must assume
that the ``__block`` variable resides in allocated storage and as such
is assumed to reference a Block that is also in allocated storage
(that it is the result of a ``Block_copy`` operation). Despite this
there is no provision to do a ``Block_copy`` or a ``Block_release`` if
an implementation provides initial automatic storage for Blocks. This
is due to the inherent race condition of potentially several threads
trying to update the shared variable and the need for synchronization
around disposing of older values and copying new ones. Such
synchronization is beyond the scope of this language specification.
Control Flow
============
The compound statement of a Block is treated much like a function body
with respect to control flow in that goto, break, and continue do not
escape the Block. Exceptions are treated *normally* in that when
thrown they pop stack frames until a catch clause is found.
Objective-C Extensions
======================
Objective-C extends the definition of a Block reference type to be
that also of id. A variable or expression of Block type may be
messaged or used as a parameter wherever an id may be. The converse is
also true. Block references may thus appear as properties and are
subject to the assign, retain, and copy attribute logic that is
reserved for objects.
All Blocks are constructed to be Objective-C objects regardless of
whether the Objective-C runtime is operational in the program or
not. Blocks using automatic (stack) memory are objects and may be
messaged, although they may not be assigned into ``__weak`` locations
if garbage collection is enabled.
Within a Block literal expression within a method definition
references to instance variables are also imported into the lexical
scope of the compound statement. These variables are implicitly
qualified as references from self, and so self is imported as a const
copy. The net effect is that instance variables can be mutated.
The :block-term:`Block_copy` operator retains all objects held in
variables of automatic storage referenced within the Block expression
(or form strong references if running under garbage collection).
Object variables of ``__block`` storage type are assumed to hold
normal pointers with no provision for retain and release messages.
Foundation defines (and supplies) ``-copy`` and ``-release`` methods for
Blocks.
In the Objective-C and Objective-C++ languages, we allow the
``__weak`` specifier for ``__block`` variables of object type. If
garbage collection is not enabled, this qualifier causes these
variables to be kept without retain messages being sent. This
knowingly leads to dangling pointers if the Block (or a copy) outlives
the lifetime of this object.
In garbage collected environments, the ``__weak`` variable is set to
nil when the object it references is collected, as long as the
``__block`` variable resides in the heap (either by default or via
``Block_copy()``). The initial Apple implementation does in fact
start ``__block`` variables on the stack and migrate them to the heap
only as a result of a ``Block_copy()`` operation.
It is a runtime error to attempt to assign a reference to a
stack-based Block into any storage marked ``__weak``, including
``__weak`` ``__block`` variables.
C++ Extensions
==============
Block literal expressions within functions are extended to allow const
use of C++ objects, pointers, or references held in automatic storage.
As usual, within the block, references to captured variables become
const-qualified, as if they were references to members of a const
object. Note that this does not change the type of a variable of
reference type.
For example, given a class Foo:
.. code-block:: c
Foo foo;
Foo &fooRef = foo;
Foo *fooPtr = &foo;
A Block that referenced these variables would import the variables as
const variations:
.. code-block:: c
const Foo block_foo = foo;
Foo &block_fooRef = fooRef;
Foo *const block_fooPtr = fooPtr;
Captured variables are copied into the Block at the instant of
evaluating the Block literal expression. They are also copied when
calling ``Block_copy()`` on a Block allocated on the stack. In both
cases, they are copied as if the variable were const-qualified, and
it's an error if there's no such constructor.
Captured variables in Blocks on the stack are destroyed when control
leaves the compound statement that contains the Block literal
expression. Captured variables in Blocks on the heap are destroyed
when the reference count of the Block drops to zero.
Variables declared as residing in ``__block`` storage may be initially
allocated in the heap or may first appear on the stack and be copied
to the heap as a result of a ``Block_copy()`` operation. When copied
from the stack, ``__block`` variables are copied using their normal
qualification (i.e. without adding const). In C++11, ``__block``
variables are copied as x-values if that is possible, then as l-values
if not; if both fail, it's an error. The destructor for any initial
stack-based version is called at the variable's normal end of scope.
References to ``this``, as well as references to non-static members of
any enclosing class, are evaluated by capturing ``this`` just like a
normal variable of C pointer type.
Member variables that are Blocks may not be overloaded by the types of
their arguments.

View File

@ -1,171 +0,0 @@
Language Specification for Blocks
2008/2/25 — created
2008/7/28 — revised, __block syntax
2008/8/13 — revised, Block globals
2008/8/21 — revised, C++ elaboration
2008/11/1 — revised, __weak support
2009/1/12 — revised, explicit return types
2009/2/10 — revised, __block objects need retain
Copyright 2008-2009 Apple, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
The Block Type
A new derived type is introduced to C and, by extension, Objective-C, C++, and Objective-C++. Like function types, the Block type is a pair consisting of a result value type and a list of parameter types very similar to a function type. Blocks are intended to be used much like functions with the key distinction being that in addition to executable code they also contain various variable bindings to automatic (stack) or managed (heap) memory.
The abstract declarator int (^)(char, float) describes a reference to a Block that, when invoked, takes two parameters, the first of type char and the second of type float, and returns a value of type int. The Block referenced is of opaque data that may reside in automatic (stack) memory, global memory, or heap memory.
Block Variable Declarations
A variable with Block type is declared using function pointer style notation substituting ^ for *. The following are valid Block variable declarations:
void (^blockReturningVoidWithVoidArgument)(void);
int (^blockReturningIntWithIntAndCharArguments)(int, char);
void (^arrayOfTenBlocksReturningVoidWithIntArgument[10])(int);
Variadic ... arguments are supported. [variadic.c] A Block that takes no arguments must specify void in the argument list [voidarg.c]. An empty parameter list does not represent, as K&R provide, an unspecified argument list. Note: both gcc and clang support K&R style as a convenience.
A Block reference may be cast to a pointer of arbitrary type and vice versa. [cast.c] A Block reference may not be dereferenced via the pointer dereference operator *, and thus a Block's size may not be computed at compile time. [sizeof.c]
Block Literal Expressions
A Block literal expression produces a reference to a Block. It is introduced by the use of the ^ token as a unary operator.
Block_literal_expression ::= ^ block_decl compound_statement_body
block_decl ::=
block_decl ::= parameter_list
block_decl ::= type_expression
...where type expression is extended to allow ^ as a Block reference (pointer) where * is allowed as a function reference (pointer).
The following Block literal:
^ void (void) { printf("hello world\n"); }
...produces a reference to a Block with no arguments with no return value.
The return type is optional and is inferred from the return statements. If the return statements return a value, they all must return a value of the same type. If there is no value returned the inferred type of the Block is void; otherwise it is the type of the return statement value.
If the return type is omitted and the argument list is ( void ), the ( void ) argument list may also be omitted.
So:
^ ( void ) { printf("hello world\n"); }
...and:
^ { printf("hello world\n"); }
...are exactly equivalent constructs for the same expression.
The type_expression extends C expression parsing to accommodate Block reference declarations as it accommodates function pointer declarations.
Given:
typedef int (*pointerToFunctionThatReturnsIntWithCharArg)(char);
pointerToFunctionThatReturnsIntWithCharArg functionPointer;
^ pointerToFunctionThatReturnsIntWithCharArg (float x) { return functionPointer; }
...and:
^ int ((*)(float x))(char) { return functionPointer; }
...are equivalent expressions, as is:
^(float x) { return functionPointer; }
[returnfunctionptr.c]
The compound statement body establishes a new lexical scope within that of its parent. Variables used within the scope of the compound statement are bound to the Block in the normal manner with the exception of those in automatic (stack) storage. Thus one may access functions and global variables as one would expect, as well as static local variables. [testme]
Local automatic (stack) variables referenced within the compound statement of a Block are imported and captured by the Block as const copies. The capture (binding) is performed at the time of the Block literal expression evaluation.
The compiler is not required to capture a variable if it can prove that no references to the variable will actually be evaluated. Programmers can force a variable to be captured by referencing it in a statement at the beginning of the Block, like so:
(void) foo;
This matters when capturing the variable has side-effects, as it can in Objective-C or C++.
The lifetime of variables declared in a Block is that of a function; each activation frame contains a new copy of variables declared within the local scope of the Block. Such variable declarations should be allowed anywhere [testme] rather than only when C99 parsing is requested, including for statements. [testme]
Block literal expressions may occur within Block literal expressions (nest) and all variables captured by any nested blocks are implicitly also captured in the scopes of their enclosing Blocks.
A Block literal expression may be used as the initialization value for Block variables at global or local static scope.
The Invoke Operator
Blocks are invoked using function call syntax with a list of expression parameters of types corresponding to the declaration and returning a result type also according to the declaration. Given:
int (^x)(char);
void (^z)(void);
int (^(*y))(char) = &x;
...the following are all legal Block invocations:
x('a');
(*y)('a');
(true ? x : *y)('a')
The Copy and Release Operations
The compiler and runtime provide copy and release operations for Block references that create and, in matched use, release allocated storage for referenced Blocks.
The copy operation Block_copy() is styled as a function that takes an arbitrary Block reference and returns a Block reference of the same type. The release operation, Block_release(), is styled as a function that takes an arbitrary Block reference and, if dynamically matched to a Block copy operation, allows recovery of the referenced allocated memory.
The __block Storage Qualifier
In addition to the new Block type we also introduce a new storage qualifier, __block, for local variables. [testme: a __block declaration within a block literal] The __block storage qualifier is mutually exclusive to the existing local storage qualifiers auto, register, and static.[testme] Variables qualified by __block act as if they were in allocated storage and this storage is automatically recovered after last use of said variable. An implementation may choose an optimization where the storage is initially automatic and only "moved" to allocated (heap) storage upon a Block_copy of a referencing Block. Such variables may be mutated as normal variables are.
In the case where a __block variable is a Block one must assume that the __block variable resides in allocated storage and as such is assumed to reference a Block that is also in allocated storage (that it is the result of a Block_copy operation). Despite this there is no provision to do a Block_copy or a Block_release if an implementation provides initial automatic storage for Blocks. This is due to the inherent race condition of potentially several threads trying to update the shared variable and the need for synchronization around disposing of older values and copying new ones. Such synchronization is beyond the scope of this language specification.
Control Flow
The compound statement of a Block is treated much like a function body with respect to control flow in that goto, break, and continue do not escape the Block. Exceptions are treated "normally" in that when thrown they pop stack frames until a catch clause is found.
Objective-C Extensions
Objective-C extends the definition of a Block reference type to be that also of id. A variable or expression of Block type may be messaged or used as a parameter wherever an id may be. The converse is also true. Block references may thus appear as properties and are subject to the assign, retain, and copy attribute logic that is reserved for objects.
All Blocks are constructed to be Objective-C objects regardless of whether the Objective-C runtime is operational in the program or not. Blocks using automatic (stack) memory are objects and may be messaged, although they may not be assigned into __weak locations if garbage collection is enabled.
Within a Block literal expression within a method definition references to instance variables are also imported into the lexical scope of the compound statement. These variables are implicitly qualified as references from self, and so self is imported as a const copy. The net effect is that instance variables can be mutated.
The Block_copy operator retains all objects held in variables of automatic storage referenced within the Block expression (or form strong references if running under garbage collection). Object variables of __block storage type are assumed to hold normal pointers with no provision for retain and release messages.
Foundation defines (and supplies) -copy and -release methods for Blocks.
In the Objective-C and Objective-C++ languages, we allow the __weak specifier for __block variables of object type. If garbage collection is not enabled, this qualifier causes these variables to be kept without retain messages being sent. This knowingly leads to dangling pointers if the Block (or a copy) outlives the lifetime of this object.
In garbage collected environments, the __weak variable is set to nil when the object it references is collected, as long as the __block variable resides in the heap (either by default or via Block_copy()). The initial Apple implementation does in fact start __block variables on the stack and migrate them to the heap only as a result of a Block_copy() operation.
It is a runtime error to attempt to assign a reference to a stack-based Block into any storage marked __weak, including __weak __block variables.
C++ Extensions
Block literal expressions within functions are extended to allow const use of C++ objects, pointers, or references held in automatic storage.
As usual, within the block, references to captured variables become const-qualified, as if they were references to members of a const object. Note that this does not change the type of a variable of reference type.
For example, given a class Foo:
Foo foo;
Foo &fooRef = foo;
Foo *fooPtr = &foo;
A Block that referenced these variables would import the variables as const variations:
const Foo block_foo = foo;
Foo &block_fooRef = fooRef;
Foo *const block_fooPtr = fooPtr;
Captured variables are copied into the Block at the instant of evaluating the Block literal expression. They are also copied when calling Block_copy() on a Block allocated on the stack. In both cases, they are copied as if the variable were const-qualified, and it's an error if there's no such constructor.
Captured variables in Blocks on the stack are destroyed when control leaves the compound statement that contains the Block literal expression. Captured variables in Blocks on the heap are destroyed when the reference count of the Block drops to zero.
Variables declared as residing in __block storage may be initially allocated in the heap or may first appear on the stack and be copied to the heap as a result of a Block_copy() operation. When copied from the stack, __block variables are copied using their normal qualification (i.e. without adding const). In C++11, __block variables are copied as x-values if that is possible, then as l-values if not; if both fail, it's an error. The destructor for any initial stack-based version is called at the variable's normal end of scope.
References to 'this', as well as references to non-static members of any enclosing class, are evaluated by capturing 'this' just like a normal variable of C pointer type.
Member variables that are Blocks may not be overloaded by the types of their arguments.

36
docs/ClangCheck.rst Normal file
View File

@ -0,0 +1,36 @@
==========
ClangCheck
==========
`ClangCheck` is a small wrapper around :doc:`LibTooling` which can be used to
do basic error checking and AST dumping.
.. code-block:: console
$ cat <<EOF > snippet.cc
> void f() {
> int a = 0
> }
> EOF
$ ~/clang/build/bin/clang-check snippet.cc -ast-dump --
Processing: /Users/danieljasper/clang/llvm/tools/clang/docs/snippet.cc.
/Users/danieljasper/clang/llvm/tools/clang/docs/snippet.cc:2:12: error: expected ';' at end of
declaration
int a = 0
^
;
(TranslationUnitDecl 0x7ff3a3029ed0 <<invalid sloc>>
(TypedefDecl 0x7ff3a302a410 <<invalid sloc>> __int128_t '__int128')
(TypedefDecl 0x7ff3a302a470 <<invalid sloc>> __uint128_t 'unsigned __int128')
(TypedefDecl 0x7ff3a302a830 <<invalid sloc>> __builtin_va_list '__va_list_tag [1]')
(FunctionDecl 0x7ff3a302a8d0 </Users/danieljasper/clang/llvm/tools/clang/docs/snippet.cc:1:1, line:3:1> f 'void (void)'
(CompoundStmt 0x7ff3a302aa10 <line:1:10, line:3:1>
(DeclStmt 0x7ff3a302a9f8 <line:2:3, line:3:1>
(VarDecl 0x7ff3a302a980 <line:2:3, col:11> a 'int'
(IntegerLiteral 0x7ff3a302a9d8 <col:11> 'int' 0))))))
1 error generated.
Error while processing snippet.cc.
The '--' at the end is important as it prevents `clang-check` from search for a
compilation database. For more information on how to setup and use `clang-check`
in a project, see :doc:`HowToSetupToolingForLLVM`.

93
docs/ClangFormat.rst Normal file
View File

@ -0,0 +1,93 @@
===========
ClangFormat
===========
`ClangFormat` describes a set of tools that are built on top of
:doc:`LibFormat`. It can support your workflow in a variety of ways including a
standalone tool and editor integrations.
Standalone Tool
===============
:program:`clang-format` is located in `clang/tools/clang-format` and can be used
to format C/C++/Obj-C code.
.. code-block:: console
$ clang-format --help
OVERVIEW: A tool to format C/C++/Obj-C code.
Currently supports LLVM and Google style guides.
If no arguments are specified, it formats the code from standard input
and writes the result to the standard output.
If <file> is given, it reformats the file. If -i is specified together
with <file>, the file is edited in-place. Otherwise, the result is
written to the standard output.
USAGE: clang-format [options] [<file>]
OPTIONS:
-fatal-assembler-warnings - Consider warnings as error
-help - Display available options (-help-hidden for more)
-i - Inplace edit <file>, if specified.
-length=<int> - Format a range of this length, -1 for end of file.
-offset=<int> - Format a range starting at this file offset.
-stats - Enable statistics output from program
-style=<string> - Coding style, currently supports: LLVM, Google, Chromium.
-version - Display the version of this program
Vim Integration
===============
There is an integration for :program:`vim` which lets you run the
:program:`clang-format` standalone tool on your current buffer, optionally
selecting regions to reformat. The integration has the form of a `python`-file
which can be found under `clang/tools/clang-format/clang-format.py`.
This can be integrated by adding the following to your `.vimrc`:
.. code-block:: vim
map <C-K> :pyf <path-to-this-file>/clang-format.py<CR>
imap <C-K> <ESC>:pyf <path-to-this-file>/clang-format.py<CR>i
The first line enables :program:`clang-format` for NORMAL and VISUAL mode, the
second line adds support for INSERT mode. Change "C-K" to another binding if
you need :program:`clang-format` on a different key (C-K stands for Ctrl+k).
With this integration you can press the bound key and clang-format will
format the current line in NORMAL and INSERT mode or the selected region in
VISUAL mode. The line or region is extended to the next bigger syntactic
entity.
It operates on the current, potentially unsaved buffer and does not create
or save any files. To revert a formatting, just undo.
Script for patch reformatting
=============================
The python script `clang/tools/clang-format-diff.py` parses the output of
a unified diff and reformats all contained lines with :program:`clang-format`.
.. code-block:: console
usage: clang-format-diff.py [-h] [-p P] [-style STYLE]
Reformat changed lines in diff
optional arguments:
-h, --help show this help message and exit
-p P strip the smallest prefix containing P slashes
-style STYLE formatting style to apply (LLVM, Google)
So to reformat all the lines in the latest :program:`git` commit, just do:
.. code-block:: console
git diff -U0 HEAD^ | clang-format-diff.py
The :option:`-U0` will create a diff without context lines (the script would format
those as well).

View File

@ -1,170 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>Clang Plugins</title>
<link type="text/css" rel="stylesheet" href="../menu.css">
<link type="text/css" rel="stylesheet" href="../content.css">
</head>
<body>
<!--#include virtual="../menu.html.incl"-->
<div id="content">
<h1>Clang Plugins</h1>
<p>Clang Plugins make it possible to run extra user defined actions during
a compilation. This document will provide a basic walkthrough of how to write
and run a Clang Plugin.</p>
<!-- ======================================================================= -->
<h2 id="intro">Introduction</h2>
<!-- ======================================================================= -->
<p>Clang Plugins run FrontendActions over code. See the
<a href="RAVFrontendAction.html">FrontendAction tutorial</a> on how to write a
FrontendAction using the RecursiveASTVisitor. In this tutorial, we'll
demonstrate how to write a simple clang plugin.
</p>
<!-- ======================================================================= -->
<h2 id="pluginactions">Writing a PluginASTAction</h2>
<!-- ======================================================================= -->
<p>The main difference from writing normal FrontendActions is that you can
handle plugin command line options. The
PluginASTAction base class declares a ParseArgs method which you have to
implement in your plugin.
</p>
<pre>
bool ParseArgs(const CompilerInstance &amp;CI,
const std::vector&lt;std::string>&amp; args) {
for (unsigned i = 0, e = args.size(); i != e; ++i) {
if (args[i] == "-some-arg") {
// Handle the command line argument.
}
}
return true;
}
</pre>
<!-- ======================================================================= -->
<h2 id="registerplugin">Registering a plugin</h2>
<!-- ======================================================================= -->
<p>A plugin is loaded from a dynamic library at runtime by the compiler. To register
a plugin in a library, use FrontendPluginRegistry::Add:</p>
<pre>
static FrontendPluginRegistry::Add&lt;MyPlugin> X("my-plugin-name", "my plugin description");
</pre>
<!-- ======================================================================= -->
<h2 id="example">Putting it all together</h2>
<!-- ======================================================================= -->
<p>Let's look at an example plugin that prints top-level function names.
This example is also checked into the clang repository; please also take a look
at the latest <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/examples/PrintFunctionNames/PrintFunctionNames.cpp?view=markup">checked in version of PrintFunctionNames.cpp</a>.</p>
<pre>
#include "clang/Frontend/FrontendPluginRegistry.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/AST.h"
#include "clang/Frontend/CompilerInstance.h"
#include "llvm/Support/raw_ostream.h"
using namespace clang;
namespace {
class PrintFunctionsConsumer : public ASTConsumer {
public:
virtual bool HandleTopLevelDecl(DeclGroupRef DG) {
for (DeclGroupRef::iterator i = DG.begin(), e = DG.end(); i != e; ++i) {
const Decl *D = *i;
if (const NamedDecl *ND = dyn_cast&lt;NamedDecl>(D))
llvm::errs() &lt;&lt; "top-level-decl: \"" &lt;&lt; ND->getNameAsString() &lt;&lt; "\"\n";
}
return true;
}
};
class PrintFunctionNamesAction : public PluginASTAction {
protected:
ASTConsumer *CreateASTConsumer(CompilerInstance &amp;CI, llvm::StringRef) {
return new PrintFunctionsConsumer();
}
bool ParseArgs(const CompilerInstance &amp;CI,
const std::vector&lt;std::string>&amp; args) {
for (unsigned i = 0, e = args.size(); i != e; ++i) {
llvm::errs() &lt;&lt; "PrintFunctionNames arg = " &lt;&lt; args[i] &lt;&lt; "\n";
// Example error handling.
if (args[i] == "-an-error") {
DiagnosticsEngine &amp;D = CI.getDiagnostics();
unsigned DiagID = D.getCustomDiagID(
DiagnosticsEngine::Error, "invalid argument '" + args[i] + "'");
D.Report(DiagID);
return false;
}
}
if (args.size() &amp;&amp; args[0] == "help")
PrintHelp(llvm::errs());
return true;
}
void PrintHelp(llvm::raw_ostream&amp; ros) {
ros &lt;&lt; "Help for PrintFunctionNames plugin goes here\n";
}
};
}
static FrontendPluginRegistry::Add&lt;PrintFunctionNamesAction>
X("print-fns", "print function names");
</pre>
<!-- ======================================================================= -->
<h2 id="running">Running the plugin</h2>
<!-- ======================================================================= -->
<p>To run a plugin, the dynamic library containing the plugin registry must be
loaded via the -load command line option. This will load all plugins that are
registered, and you can select the plugins to run by specifying the -plugin
option. Additional parameters for the plugins can be passed with -plugin-arg-&lt;plugin-name>.</p>
<p>Note that those options must reach clang's cc1 process. There are two
ways to do so:</p>
<ul>
<li>
Directly call the parsing process by using the -cc1 option; this has the
downside of not configuring the default header search paths, so you'll need to
specify the full system path configuration on the command line.
</li>
<li>
Use clang as usual, but prefix all arguments to the cc1 process with -Xclang.
</li>
</ul>
<p>For example, to run the print-function-names plugin over a source file in clang,
first build the plugin, and then call clang with the plugin from the source tree:</p>
<pre>
$ export BD=/path/to/build/directory
$ (cd $BD &amp;&amp; make PrintFunctionNames )
$ clang++ -D_GNU_SOURCE -D_DEBUG -D__STDC_CONSTANT_MACROS \
-D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -D_GNU_SOURCE \
-I$BD/tools/clang/include -Itools/clang/include -I$BD/include -Iinclude \
tools/clang/tools/clang-check/ClangCheck.cpp -fsyntax-only \
-Xclang -load -Xclang $BD/lib/PrintFunctionNames.so -Xclang \
-plugin -Xclang print-fns
</pre>
<p>Also see the print-function-name plugin example's
<a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/examples/PrintFunctionNames/README.txt?view=markup">README</a></p>
</div>
</body>
</html>

150
docs/ClangPlugins.rst Normal file
View File

@ -0,0 +1,150 @@
=============
Clang Plugins
=============
Clang Plugins make it possible to run extra user defined actions during a
compilation. This document will provide a basic walkthrough of how to write and
run a Clang Plugin.
Introduction
============
Clang Plugins run FrontendActions over code. See the :doc:`FrontendAction
tutorial <RAVFrontendAction>` on how to write a ``FrontendAction`` using the
``RecursiveASTVisitor``. In this tutorial, we'll demonstrate how to write a
simple clang plugin.
Writing a ``PluginASTAction``
=============================
The main difference from writing normal ``FrontendActions`` is that you can
handle plugin command line options. The ``PluginASTAction`` base class declares
a ``ParseArgs`` method which you have to implement in your plugin.
.. code-block:: c++
bool ParseArgs(const CompilerInstance &CI,
const std::vector<std::string>& args) {
for (unsigned i = 0, e = args.size(); i != e; ++i) {
if (args[i] == "-some-arg") {
// Handle the command line argument.
}
}
return true;
}
Registering a plugin
====================
A plugin is loaded from a dynamic library at runtime by the compiler. To
register a plugin in a library, use ``FrontendPluginRegistry::Add<>``:
.. code-block:: c++
static FrontendPluginRegistry::Add<MyPlugin> X("my-plugin-name", "my plugin description");
Putting it all together
=======================
Let's look at an example plugin that prints top-level function names. This
example is also checked into the clang repository; please also take a look at
the latest `checked in version of PrintFunctionNames.cpp
<http://llvm.org/viewvc/llvm-project/cfe/trunk/examples/PrintFunctionNames/PrintFunctionNames.cpp?view=markup>`_.
.. code-block:: c++
#include "clang/Frontend/FrontendPluginRegistry.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/AST.h"
#include "clang/Frontend/CompilerInstance.h"
#include "llvm/Support/raw_ostream.h"
using namespace clang;
namespace {
class PrintFunctionsConsumer : public ASTConsumer {
public:
virtual bool HandleTopLevelDecl(DeclGroupRef DG) {
for (DeclGroupRef::iterator i = DG.begin(), e = DG.end(); i != e; ++i) {
const Decl *D = *i;
if (const NamedDecl *ND = dyn_cast<NamedDecl>(D))
llvm::errs() << "top-level-decl: \"" << ND->getNameAsString() << "\"\n";
}
return true;
}
};
class PrintFunctionNamesAction : public PluginASTAction {
protected:
ASTConsumer *CreateASTConsumer(CompilerInstance &CI, llvm::StringRef) {
return new PrintFunctionsConsumer();
}
bool ParseArgs(const CompilerInstance &CI,
const std::vector<std::string>& args) {
for (unsigned i = 0, e = args.size(); i != e; ++i) {
llvm::errs() << "PrintFunctionNames arg = " << args[i] << "\n";
// Example error handling.
if (args[i] == "-an-error") {
DiagnosticsEngine &D = CI.getDiagnostics();
unsigned DiagID = D.getCustomDiagID(
DiagnosticsEngine::Error, "invalid argument '" + args[i] + "'");
D.Report(DiagID);
return false;
}
}
if (args.size() && args[0] == "help")
PrintHelp(llvm::errs());
return true;
}
void PrintHelp(llvm::raw_ostream& ros) {
ros << "Help for PrintFunctionNames plugin goes here\n";
}
};
}
static FrontendPluginRegistry::Add<PrintFunctionNamesAction>
X("print-fns", "print function names");
Running the plugin
==================
To run a plugin, the dynamic library containing the plugin registry must be
loaded via the :option:`-load` command line option. This will load all plugins
that are registered, and you can select the plugins to run by specifying the
:option:`-plugin` option. Additional parameters for the plugins can be passed with
:option:`-plugin-arg-<plugin-name>`.
Note that those options must reach clang's cc1 process. There are two
ways to do so:
* Directly call the parsing process by using the :option:`-cc1` option; this
has the downside of not configuring the default header search paths, so
you'll need to specify the full system path configuration on the command
line.
* Use clang as usual, but prefix all arguments to the cc1 process with
:option:`-Xclang`.
For example, to run the ``print-function-names`` plugin over a source file in
clang, first build the plugin, and then call clang with the plugin from the
source tree:
.. code-block:: console
$ export BD=/path/to/build/directory
$ (cd $BD && make PrintFunctionNames )
$ clang++ -D_GNU_SOURCE -D_DEBUG -D__STDC_CONSTANT_MACROS \
-D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -D_GNU_SOURCE \
-I$BD/tools/clang/include -Itools/clang/include -I$BD/include -Iinclude \
tools/clang/tools/clang-check/ClangCheck.cpp -fsyntax-only \
-Xclang -load -Xclang $BD/lib/PrintFunctionNames.so -Xclang \
-plugin -Xclang print-fns
Also see the print-function-name plugin example's
`README <http://llvm.org/viewvc/llvm-project/cfe/trunk/examples/PrintFunctionNames/README.txt?view=markup>`_

View File

@ -1,110 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>Clang Tools</title>
<link type="text/css" rel="stylesheet" href="../menu.css">
<link type="text/css" rel="stylesheet" href="../content.css">
</head>
<body>
<!--#include virtual="../menu.html.incl"-->
<div id="content">
<h1>Clang Tools</h1>
<p>Clang Tools are standalone command line (and potentially GUI) tools design
for use by C++ developers who are already using and enjoying Clang as their
compiler. These tools provide developer-oriented functionality such as fast
syntax checking, automatic formatting, refactoring, etc.</p>
<p>Only a couple of the most basic and fundamental tools are kept in the primary
Clang Subversion project. The rest of the tools are kept in a side-project so
that developers who don't want or need to build them don't. If you want to get
access to the extra Clang Tools repository, simply check it out into the tools
tree of your Clang checkout and follow the usual process for building and
working with a combined LLVM/Clang checkout:</p>
<ul>
<li>With Subversion:
<ul>
<li><tt>cd llvm/tools/clang/tools</tt></li>
<li><tt>svn co http://llvm.org/svn/llvm-project/clang-tools-extra/trunk
extra</tt></li>
</ul>
</li>
<li>Or with Git:
<ul>
<li><tt>cd llvm/tools/clang/tools</tt></li>
<li><tt>git clone http://llvm.org/git/clang-tools-extra.git extra</tt></li>
</ul>
</li>
</ul>
<p>This document describes a high-level overview of the organization of Clang
Tools within the project as well as giving an introduction to some of the more
important tools. However, it should be noted that this document is currently
focused on Clang and Clang Tool developers, not on end users of these tools.</p>
<!-- ======================================================================= -->
<h2 id="org">Clang Tools Organization</h2>
<!-- ======================================================================= -->
<p>Clang Tools are CLI or GUI programs that are intended to be directly used by
C++ developers. That is they are <em>not</em> primarily for use by Clang
developers, although they are hopefully useful to C++ developers who happen to
work on Clang, and we try to actively dogfood their functionality. They are
developed in three components: the underlying infrastructure for building
a standalone tool based on Clang, core shared logic used by many different tools
in the form of refactoring and rewriting libraries, and the tools
themselves.</p>
<p>The underlying infrastructure for Clang Tools is the
<a href="LibTooling.html">LibTooling</a> platform. See its documentation for
much more detailed information about how this infrastructure works. The common
refactoring and rewriting toolkit-style library is also part of LibTooling
organizationally.</p>
<p>A few Clang Tools are developed along side the core Clang libraries as
examples and test cases of fundamental functionality. However, most of the tools
are developed in a side repository to provide easy separation from the core
libraries. We intentionally do not support public libraries in the side
repository, as we want to carefully review and find good APIs for libraries as
they are lifted out of a few tools and into the core Clang library set.</p>
<p>Regardless of which repository Clang Tools' code resides in, the development
process and practices for all Clang Tools are exactly those of Clang itself.
They are entirely within the Clang <em>project</em>, regardless of the version
control scheme.</p>
<!-- ======================================================================= -->
<h2 id="coretools">Core Clang Tools</h2>
<!-- ======================================================================= -->
<p>The core set of Clang tools that are within the main repository are tools
that very specifically compliment, and allow use and testing of <em>Clang</em>
specific functionality.</p>
<h3 id="clang-check"><tt>clang-check</tt></h3>
<p>This tool combines the LibTooling framework for running a Clang tool with the
basic Clang diagnostics by syntax checking specific files in a fast, command
line interface. It can also accept flags to re-display the diagnostics in
different formats with different flags, suitable for use driving an IDE or
editor. Furthermore, it can be used in fixit-mode to directly apply fixit-hints
offered by clang.</p>
<p>FIXME: Link to user-oriented clang-check documentation.</p>
<!-- ======================================================================= -->
<h2 id="registerplugin">Extra Clang Tools</h2>
<!-- ======================================================================= -->
<p>As various categories of Clang Tools are added to the extra repository,
they'll be tracked here. The focus of this documentation is on the scope and
features of the tools for other tool developers; each tool should provide its
own user-focused documentation.</p>
</div>
</body>
</html>

152
docs/ClangTools.rst Normal file
View File

@ -0,0 +1,152 @@
========
Overview
========
Clang Tools are standalone command line (and potentially GUI) tools
designed for use by C++ developers who are already using and enjoying
Clang as their compiler. These tools provide developer-oriented
functionality such as fast syntax checking, automatic formatting,
refactoring, etc.
Only a couple of the most basic and fundamental tools are kept in the
primary Clang Subversion project. The rest of the tools are kept in a
side-project so that developers who don't want or need to build them
don't. If you want to get access to the extra Clang Tools repository,
simply check it out into the tools tree of your Clang checkout and
follow the usual process for building and working with a combined
LLVM/Clang checkout:
- With Subversion:
- ``cd llvm/tools/clang/tools``
- ``svn co http://llvm.org/svn/llvm-project/clang-tools-extra/trunk extra``
- Or with Git:
- ``cd llvm/tools/clang/tools``
- ``git clone http://llvm.org/git/clang-tools-extra.git extra``
This document describes a high-level overview of the organization of
Clang Tools within the project as well as giving an introduction to some
of the more important tools. However, it should be noted that this
document is currently focused on Clang and Clang Tool developers, not on
end users of these tools.
Clang Tools Organization
========================
Clang Tools are CLI or GUI programs that are intended to be directly
used by C++ developers. That is they are *not* primarily for use by
Clang developers, although they are hopefully useful to C++ developers
who happen to work on Clang, and we try to actively dogfood their
functionality. They are developed in three components: the underlying
infrastructure for building a standalone tool based on Clang, core
shared logic used by many different tools in the form of refactoring and
rewriting libraries, and the tools themselves.
The underlying infrastructure for Clang Tools is the
:doc:`LibTooling <LibTooling>` platform. See its documentation for much
more detailed information about how this infrastructure works. The
common refactoring and rewriting toolkit-style library is also part of
LibTooling organizationally.
A few Clang Tools are developed along side the core Clang libraries as
examples and test cases of fundamental functionality. However, most of
the tools are developed in a side repository to provide easy separation
from the core libraries. We intentionally do not support public
libraries in the side repository, as we want to carefully review and
find good APIs for libraries as they are lifted out of a few tools and
into the core Clang library set.
Regardless of which repository Clang Tools' code resides in, the
development process and practices for all Clang Tools are exactly those
of Clang itself. They are entirely within the Clang *project*,
regardless of the version control scheme.
Core Clang Tools
================
The core set of Clang tools that are within the main repository are
tools that very specifically complement, and allow use and testing of
*Clang* specific functionality.
``clang-check``
---------------
:doc:`ClangCheck` combines the LibTooling framework for running a
Clang tool with the basic Clang diagnostics by syntax checking specific files
in a fast, command line interface. It can also accept flags to re-display the
diagnostics in different formats with different flags, suitable for use driving
an IDE or editor. Furthermore, it can be used in fixit-mode to directly apply
fixit-hints offered by clang. See :doc:`HowToSetupToolingForLLVM` for
instructions on how to setup and used `clang-check`.
``clang-format``
~~~~~~~~~~~~~~~~
Clang-format is both a :doc:`library <LibFormat>` and a :doc:`stand-alone tool
<ClangFormat>` with the goal of automatically reformatting C++ sources files
according to configurable style guides. To do so, clang-format uses Clang's
``Lexer`` to transform an input file into a token stream and then changes all
the whitespace around those tokens. The goal is for clang-format to both serve
both as a user tool (ideally with powerful IDE integrations) and part of other
refactoring tools, e.g. to do a reformatting of all the lines changed during a
renaming.
``cpp11-migrate``
~~~~~~~~~~~~~~~~~
``cpp11-migrate`` migrates C++ code to use C++11 features where appropriate.
Currently it can:
* convert loops to range-based for loops;
* convert null pointer constants (like ``NULL`` or ``0``) to C++11 ``nullptr``.
Extra Clang Tools
=================
As various categories of Clang Tools are added to the extra repository,
they'll be tracked here. The focus of this documentation is on the scope
and features of the tools for other tool developers; each tool should
provide its own user-focused documentation.
Ideas for new Tools
===================
* C++ cast conversion tool. Will convert C-style casts (``(type) value``) to
appropriate C++ cast (``static_cast``, ``const_cast`` or
``reinterpret_cast``).
* Non-member ``begin()`` and ``end()`` conversion tool. Will convert
``foo.begin()`` into ``begin(foo)`` and similarly for ``end()``, where
``foo`` is a standard container. We could also detect similar patterns for
arrays.
* ``tr1`` removal tool. Will migrate source code from using TR1 library
features to C++11 library. For example:
.. code-block:: c++
#include <tr1/unordered_map>
int main()
{
std::tr1::unordered_map <int, int> ma;
std::cout << ma.size () << std::endl;
return 0;
}
should be rewritten to:
.. code-block:: c++
#include <unordered_map>
int main()
{
std::unordered_map <int, int> ma;
std::cout << ma.size () << std::endl;
return 0;
}
* A tool to remove ``auto``. Will convert ``auto`` to an explicit type or add
comments with deduced types. The motivation is that there are developers
that don't want to use ``auto`` because they are afraid that they might lose
control over their code.

View File

@ -1,523 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>Clang Driver Manual</title>
<link type="text/css" rel="stylesheet" href="../menu.css">
<link type="text/css" rel="stylesheet" href="../content.css">
<style type="text/css">
td {
vertical-align: top;
}
</style>
</head>
<body>
<!--#include virtual="../menu.html.incl"-->
<div id="content">
<h1>Driver Design &amp; Internals</h1>
<ul>
<li><a href="#intro">Introduction</a></li>
<li><a href="#features">Features and Goals</a>
<ul>
<li><a href="#gcccompat">GCC Compatibility</a></li>
<li><a href="#components">Flexible</a></li>
<li><a href="#performance">Low Overhead</a></li>
<li><a href="#simple">Simple</a></li>
</ul>
</li>
<li><a href="#design">Design</a>
<ul>
<li><a href="#int_intro">Internals Introduction</a></li>
<li><a href="#int_overview">Design Overview</a></li>
<li><a href="#int_notes">Additional Notes</a>
<ul>
<li><a href="#int_compilation">The Compilation Object</a></li>
<li><a href="#int_unified_parsing">Unified Parsing &amp; Pipelining</a></li>
<li><a href="#int_toolchain_translation">ToolChain Argument Translation</a></li>
<li><a href="#int_unused_warnings">Unused Argument Warnings</a></li>
</ul>
</li>
<li><a href="#int_gcc_concepts">Relation to GCC Driver Concepts</a></li>
</ul>
</li>
</ul>
<!-- ======================================================================= -->
<h2 id="intro">Introduction</h2>
<!-- ======================================================================= -->
<p>This document describes the Clang driver. The purpose of this
document is to describe both the motivation and design goals
for the driver, as well as details of the internal
implementation.</p>
<!-- ======================================================================= -->
<h2 id="features">Features and Goals</h2>
<!-- ======================================================================= -->
<p>The Clang driver is intended to be a production quality
compiler driver providing access to the Clang compiler and
tools, with a command line interface which is compatible with
the gcc driver.</p>
<p>Although the driver is part of and driven by the Clang
project, it is logically a separate tool which shares many of
the same goals as Clang:</p>
<p><b>Features</b>:</p>
<ul>
<li><a href="#gcccompat">GCC Compatibility</a></li>
<li><a href="#components">Flexible</a></li>
<li><a href="#performance">Low Overhead</a></li>
<li><a href="#simple">Simple</a></li>
</ul>
<!--=======================================================================-->
<h3 id="gcccompat">GCC Compatibility</h3>
<!--=======================================================================-->
<p>The number one goal of the driver is to ease the adoption of
Clang by allowing users to drop Clang into a build system
which was designed to call GCC. Although this makes the driver
much more complicated than might otherwise be necessary, we
decided that being very compatible with the gcc command line
interface was worth it in order to allow users to quickly test
clang on their projects.</p>
<!--=======================================================================-->
<h3 id="components">Flexible</h3>
<!--=======================================================================-->
<p>The driver was designed to be flexible and easily accommodate
new uses as we grow the clang and LLVM infrastructure. As one
example, the driver can easily support the introduction of
tools which have an integrated assembler; something we hope to
add to LLVM in the future.</p>
<p>Similarly, most of the driver functionality is kept in a
library which can be used to build other tools which want to
implement or accept a gcc like interface. </p>
<!--=======================================================================-->
<h3 id="performance">Low Overhead</h3>
<!--=======================================================================-->
<p>The driver should have as little overhead as possible. In
practice, we found that the gcc driver by itself incurred a
small but meaningful overhead when compiling many small
files. The driver doesn't do much work compared to a
compilation, but we have tried to keep it as efficient as
possible by following a few simple principles:</p>
<ul>
<li>Avoid memory allocation and string copying when
possible.</li>
<li>Don't parse arguments more than once.</li>
<li>Provide a few simple interfaces for efficiently searching
arguments.</li>
</ul>
<!--=======================================================================-->
<h3 id="simple">Simple</h3>
<!--=======================================================================-->
<p>Finally, the driver was designed to be "as simple as
possible", given the other goals. Notably, trying to be
completely compatible with the gcc driver adds a significant
amount of complexity. However, the design of the driver
attempts to mitigate this complexity by dividing the process
into a number of independent stages instead of a single
monolithic task.</p>
<!-- ======================================================================= -->
<h2 id="design">Internal Design and Implementation</h2>
<!-- ======================================================================= -->
<ul>
<li><a href="#int_intro">Internals Introduction</a></li>
<li><a href="#int_overview">Design Overview</a></li>
<li><a href="#int_notes">Additional Notes</a></li>
<li><a href="#int_gcc_concepts">Relation to GCC Driver Concepts</a></li>
</ul>
<!--=======================================================================-->
<h3><a name="int_intro">Internals Introduction</a></h3>
<!--=======================================================================-->
<p>In order to satisfy the stated goals, the driver was designed
to completely subsume the functionality of the gcc executable;
that is, the driver should not need to delegate to gcc to
perform subtasks. On Darwin, this implies that the Clang
driver also subsumes the gcc driver-driver, which is used to
implement support for building universal images (binaries and
object files). This also implies that the driver should be
able to call the language specific compilers (e.g. cc1)
directly, which means that it must have enough information to
forward command line arguments to child processes
correctly.</p>
<!--=======================================================================-->
<h3><a name="int_overview">Design Overview</a></h3>
<!--=======================================================================-->
<p>The diagram below shows the significant components of the
driver architecture and how they relate to one another. The
orange components represent concrete data structures built by
the driver, the green components indicate conceptually
distinct stages which manipulate these data structures, and
the blue components are important helper classes. </p>
<div style="text-align:center">
<a href="DriverArchitecture.png">
<img width=400 src="DriverArchitecture.png"
alt="Driver Architecture Diagram">
</a>
</div>
<!--=======================================================================-->
<h3><a name="int_stages">Driver Stages</a></h3>
<!--=======================================================================-->
<p>The driver functionality is conceptually divided into five stages:</p>
<ol>
<li>
<b>Parse: Option Parsing</b>
<p>The command line argument strings are decomposed into
arguments (<tt>Arg</tt> instances). The driver expects to
understand all available options, although there is some
facility for just passing certain classes of options
through (like <tt>-Wl,</tt>).</p>
<p>Each argument corresponds to exactly one
abstract <tt>Option</tt> definition, which describes how
the option is parsed along with some additional
metadata. The Arg instances themselves are lightweight and
merely contain enough information for clients to determine
which option they correspond to and their values (if they
have additional parameters).</p>
<p>For example, a command line like "-Ifoo -I foo" would
parse to two Arg instances (a JoinedArg and a SeparateArg
instance), but each would refer to the same Option.</p>
<p>Options are lazily created in order to avoid populating
all Option classes when the driver is loaded. Most of the
driver code only needs to deal with options by their
unique ID (e.g., <tt>options::OPT_I</tt>),</p>
<p>Arg instances themselves do not generally store the
values of parameters. In many cases, this would
simply result in creating unnecessary string
copies. Instead, Arg instances are always embedded inside
an ArgList structure, which contains the original vector
of argument strings. Each Arg itself only needs to contain
an index into this vector instead of storing its values
directly.</p>
<p>The clang driver can dump the results of this
stage using the <tt>-ccc-print-options</tt> flag (which
must precede any actual command line arguments). For
example:</p>
<pre>
$ <b>clang -ccc-print-options -Xarch_i386 -fomit-frame-pointer -Wa,-fast -Ifoo -I foo t.c</b>
Option 0 - Name: "-Xarch_", Values: {"i386", "-fomit-frame-pointer"}
Option 1 - Name: "-Wa,", Values: {"-fast"}
Option 2 - Name: "-I", Values: {"foo"}
Option 3 - Name: "-I", Values: {"foo"}
Option 4 - Name: "&lt;input&gt;", Values: {"t.c"}
</pre>
<p>After this stage is complete the command line should be
broken down into well defined option objects with their
appropriate parameters. Subsequent stages should rarely,
if ever, need to do any string processing.</p>
</li>
<li>
<b>Pipeline: Compilation Job Construction</b>
<p>Once the arguments are parsed, the tree of subprocess
jobs needed for the desired compilation sequence are
constructed. This involves determining the input files and
their types, what work is to be done on them (preprocess,
compile, assemble, link, etc.), and constructing a list of
Action instances for each task. The result is a list of
one or more top-level actions, each of which generally
corresponds to a single output (for example, an object or
linked executable).</p>
<p>The majority of Actions correspond to actual tasks,
however there are two special Actions. The first is
InputAction, which simply serves to adapt an input
argument for use as an input to other Actions. The second
is BindArchAction, which conceptually alters the
architecture to be used for all of its input Actions.</p>
<p>The clang driver can dump the results of this
stage using the <tt>-ccc-print-phases</tt> flag. For
example:</p>
<pre>
$ <b>clang -ccc-print-phases -x c t.c -x assembler t.s</b>
0: input, "t.c", c
1: preprocessor, {0}, cpp-output
2: compiler, {1}, assembler
3: assembler, {2}, object
4: input, "t.s", assembler
5: assembler, {4}, object
6: linker, {3, 5}, image
</pre>
<p>Here the driver is constructing seven distinct actions,
four to compile the "t.c" input into an object file, two to
assemble the "t.s" input, and one to link them together.</p>
<p>A rather different compilation pipeline is shown here; in
this example there are two top level actions to compile
the input files into two separate object files, where each
object file is built using <tt>lipo</tt> to merge results
built for two separate architectures.</p>
<pre>
$ <b>clang -ccc-print-phases -c -arch i386 -arch x86_64 t0.c t1.c</b>
0: input, "t0.c", c
1: preprocessor, {0}, cpp-output
2: compiler, {1}, assembler
3: assembler, {2}, object
4: bind-arch, "i386", {3}, object
5: bind-arch, "x86_64", {3}, object
6: lipo, {4, 5}, object
7: input, "t1.c", c
8: preprocessor, {7}, cpp-output
9: compiler, {8}, assembler
10: assembler, {9}, object
11: bind-arch, "i386", {10}, object
12: bind-arch, "x86_64", {10}, object
13: lipo, {11, 12}, object
</pre>
<p>After this stage is complete the compilation process is
divided into a simple set of actions which need to be
performed to produce intermediate or final outputs (in
some cases, like <tt>-fsyntax-only</tt>, there is no
"real" final output). Phases are well known compilation
steps, such as "preprocess", "compile", "assemble",
"link", etc.</p>
</li>
<li>
<b>Bind: Tool &amp; Filename Selection</b>
<p>This stage (in conjunction with the Translate stage)
turns the tree of Actions into a list of actual subprocess
to run. Conceptually, the driver performs a top down
matching to assign Action(s) to Tools. The ToolChain is
responsible for selecting the tool to perform a particular
action; once selected the driver interacts with the tool
to see if it can match additional actions (for example, by
having an integrated preprocessor).
<p>Once Tools have been selected for all actions, the driver
determines how the tools should be connected (for example,
using an inprocess module, pipes, temporary files, or user
provided filenames). If an output file is required, the
driver also computes the appropriate file name (the suffix
and file location depend on the input types and options
such as <tt>-save-temps</tt>).
<p>The driver interacts with a ToolChain to perform the Tool
bindings. Each ToolChain contains information about all
the tools needed for compilation for a particular
architecture, platform, and operating system. A single
driver invocation may query multiple ToolChains during one
compilation in order to interact with tools for separate
architectures.</p>
<p>The results of this stage are not computed directly, but
the driver can print the results via
the <tt>-ccc-print-bindings</tt> option. For example:</p>
<pre>
$ <b>clang -ccc-print-bindings -arch i386 -arch ppc t0.c</b>
# "i386-apple-darwin9" - "clang", inputs: ["t0.c"], output: "/tmp/cc-Sn4RKF.s"
# "i386-apple-darwin9" - "darwin::Assemble", inputs: ["/tmp/cc-Sn4RKF.s"], output: "/tmp/cc-gvSnbS.o"
# "i386-apple-darwin9" - "darwin::Link", inputs: ["/tmp/cc-gvSnbS.o"], output: "/tmp/cc-jgHQxi.out"
# "ppc-apple-darwin9" - "gcc::Compile", inputs: ["t0.c"], output: "/tmp/cc-Q0bTox.s"
# "ppc-apple-darwin9" - "gcc::Assemble", inputs: ["/tmp/cc-Q0bTox.s"], output: "/tmp/cc-WCdicw.o"
# "ppc-apple-darwin9" - "gcc::Link", inputs: ["/tmp/cc-WCdicw.o"], output: "/tmp/cc-HHBEBh.out"
# "i386-apple-darwin9" - "darwin::Lipo", inputs: ["/tmp/cc-jgHQxi.out", "/tmp/cc-HHBEBh.out"], output: "a.out"
</pre>
<p>This shows the tool chain, tool, inputs and outputs which
have been bound for this compilation sequence. Here clang
is being used to compile t0.c on the i386 architecture and
darwin specific versions of the tools are being used to
assemble and link the result, but generic gcc versions of
the tools are being used on PowerPC.</p>
</li>
<li>
<b>Translate: Tool Specific Argument Translation</b>
<p>Once a Tool has been selected to perform a particular
Action, the Tool must construct concrete Jobs which will be
executed during compilation. The main work is in translating
from the gcc style command line options to whatever options
the subprocess expects.</p>
<p>Some tools, such as the assembler, only interact with a
handful of arguments and just determine the path of the
executable to call and pass on their input and output
arguments. Others, like the compiler or the linker, may
translate a large number of arguments in addition.</p>
<p>The ArgList class provides a number of simple helper
methods to assist with translating arguments; for example,
to pass on only the last of arguments corresponding to some
option, or all arguments for an option.</p>
<p>The result of this stage is a list of Jobs (executable
paths and argument strings) to execute.</p>
</li>
<li>
<b>Execute</b>
<p>Finally, the compilation pipeline is executed. This is
mostly straightforward, although there is some interaction
with options
like <tt>-pipe</tt>, <tt>-pass-exit-codes</tt>
and <tt>-time</tt>.</p>
</li>
</ol>
<!--=======================================================================-->
<h3><a name="int_notes">Additional Notes</a></h3>
<!--=======================================================================-->
<h4 id="int_compilation">The Compilation Object</h4>
<p>The driver constructs a Compilation object for each set of
command line arguments. The Driver itself is intended to be
invariant during construction of a Compilation; an IDE should be
able to construct a single long lived driver instance to use
for an entire build, for example.</p>
<p>The Compilation object holds information that is particular
to each compilation sequence. For example, the list of used
temporary files (which must be removed once compilation is
finished) and result files (which should be removed if
compilation fails).</p>
<h4 id="int_unified_parsing">Unified Parsing &amp; Pipelining</h4>
<p>Parsing and pipelining both occur without reference to a
Compilation instance. This is by design; the driver expects that
both of these phases are platform neutral, with a few very well
defined exceptions such as whether the platform uses a driver
driver.</p>
<h4 id="int_toolchain_translation">ToolChain Argument Translation</h4>
<p>In order to match gcc very closely, the clang driver
currently allows tool chains to perform their own translation of
the argument list (into a new ArgList data structure). Although
this allows the clang driver to match gcc easily, it also makes
the driver operation much harder to understand (since the Tools
stop seeing some arguments the user provided, and see new ones
instead).</p>
<p>For example, on Darwin <tt>-gfull</tt> gets translated into two
separate arguments, <tt>-g</tt>
and <tt>-fno-eliminate-unused-debug-symbols</tt>. Trying to write Tool
logic to do something with <tt>-gfull</tt> will not work, because Tool
argument translation is done after the arguments have been
translated.</p>
<p>A long term goal is to remove this tool chain specific
translation, and instead force each tool to change its own logic
to do the right thing on the untranslated original arguments.</p>
<h4 id="int_unused_warnings">Unused Argument Warnings</h4>
<p>The driver operates by parsing all arguments but giving Tools
the opportunity to choose which arguments to pass on. One
downside of this infrastructure is that if the user misspells
some option, or is confused about which options to use, some
command line arguments the user really cared about may go
unused. This problem is particularly important when using
clang as a compiler, since the clang compiler does not support
anywhere near all the options that gcc does, and we want to make
sure users know which ones are being used.</p>
<p>To support this, the driver maintains a bit associated with
each argument of whether it has been used (at all) during the
compilation. This bit usually doesn't need to be set by hand,
as the key ArgList accessors will set it automatically.</p>
<p>When a compilation is successful (there are no errors), the
driver checks the bit and emits an "unused argument" warning for
any arguments which were never accessed. This is conservative
(the argument may not have been used to do what the user wanted)
but still catches the most obvious cases.</p>
<!--=======================================================================-->
<h3><a name="int_gcc_concepts">Relation to GCC Driver Concepts</a></h3>
<!--=======================================================================-->
<p>For those familiar with the gcc driver, this section provides
a brief overview of how things from the gcc driver map to the
clang driver.</p>
<ul>
<li>
<b>Driver Driver</b>
<p>The driver driver is fully integrated into the clang
driver. The driver simply constructs additional Actions to
bind the architecture during the <i>Pipeline</i>
phase. The tool chain specific argument translation is
responsible for handling <tt>-Xarch_</tt>.</p>
<p>The one caveat is that this approach
requires <tt>-Xarch_</tt> not be used to alter the
compilation itself (for example, one cannot
provide <tt>-S</tt> as an <tt>-Xarch_</tt> argument). The
driver attempts to reject such invocations, and overall
there isn't a good reason to abuse <tt>-Xarch_</tt> to
that end in practice.</p>
<p>The upside is that the clang driver is more efficient and
does little extra work to support universal builds. It also
provides better error reporting and UI consistency.</p>
</li>
<li>
<b>Specs</b>
<p>The clang driver has no direct correspondent for
"specs". The majority of the functionality that is
embedded in specs is in the Tool specific argument
translation routines. The parts of specs which control the
compilation pipeline are generally part of
the <i>Pipeline</i> stage.</p>
</li>
<li>
<b>Toolchains</b>
<p>The gcc driver has no direct understanding of tool
chains. Each gcc binary roughly corresponds to the
information which is embedded inside a single
ToolChain.</p>
<p>The clang driver is intended to be portable and support
complex compilation environments. All platform and tool
chain specific code should be protected behind either
abstract or well defined interfaces (such as whether the
platform supports use as a driver driver).</p>
</li>
</ul>
</div>
</body>
</html>

400
docs/DriverInternals.rst Normal file
View File

@ -0,0 +1,400 @@
=========================
Driver Design & Internals
=========================
.. contents::
:local:
Introduction
============
This document describes the Clang driver. The purpose of this document
is to describe both the motivation and design goals for the driver, as
well as details of the internal implementation.
Features and Goals
==================
The Clang driver is intended to be a production quality compiler driver
providing access to the Clang compiler and tools, with a command line
interface which is compatible with the gcc driver.
Although the driver is part of and driven by the Clang project, it is
logically a separate tool which shares many of the same goals as Clang:
.. contents:: Features
:local:
GCC Compatibility
-----------------
The number one goal of the driver is to ease the adoption of Clang by
allowing users to drop Clang into a build system which was designed to
call GCC. Although this makes the driver much more complicated than
might otherwise be necessary, we decided that being very compatible with
the gcc command line interface was worth it in order to allow users to
quickly test clang on their projects.
Flexible
--------
The driver was designed to be flexible and easily accommodate new uses
as we grow the clang and LLVM infrastructure. As one example, the driver
can easily support the introduction of tools which have an integrated
assembler; something we hope to add to LLVM in the future.
Similarly, most of the driver functionality is kept in a library which
can be used to build other tools which want to implement or accept a gcc
like interface.
Low Overhead
------------
The driver should have as little overhead as possible. In practice, we
found that the gcc driver by itself incurred a small but meaningful
overhead when compiling many small files. The driver doesn't do much
work compared to a compilation, but we have tried to keep it as
efficient as possible by following a few simple principles:
- Avoid memory allocation and string copying when possible.
- Don't parse arguments more than once.
- Provide a few simple interfaces for efficiently searching arguments.
Simple
------
Finally, the driver was designed to be "as simple as possible", given
the other goals. Notably, trying to be completely compatible with the
gcc driver adds a significant amount of complexity. However, the design
of the driver attempts to mitigate this complexity by dividing the
process into a number of independent stages instead of a single
monolithic task.
Internal Design and Implementation
==================================
.. contents::
:local:
:depth: 1
Internals Introduction
----------------------
In order to satisfy the stated goals, the driver was designed to
completely subsume the functionality of the gcc executable; that is, the
driver should not need to delegate to gcc to perform subtasks. On
Darwin, this implies that the Clang driver also subsumes the gcc
driver-driver, which is used to implement support for building universal
images (binaries and object files). This also implies that the driver
should be able to call the language specific compilers (e.g. cc1)
directly, which means that it must have enough information to forward
command line arguments to child processes correctly.
Design Overview
---------------
The diagram below shows the significant components of the driver
architecture and how they relate to one another. The orange components
represent concrete data structures built by the driver, the green
components indicate conceptually distinct stages which manipulate these
data structures, and the blue components are important helper classes.
.. image:: DriverArchitecture.png
:align: center
:alt: Driver Architecture Diagram
Driver Stages
-------------
The driver functionality is conceptually divided into five stages:
#. **Parse: Option Parsing**
The command line argument strings are decomposed into arguments
(``Arg`` instances). The driver expects to understand all available
options, although there is some facility for just passing certain
classes of options through (like ``-Wl,``).
Each argument corresponds to exactly one abstract ``Option``
definition, which describes how the option is parsed along with some
additional metadata. The Arg instances themselves are lightweight and
merely contain enough information for clients to determine which
option they correspond to and their values (if they have additional
parameters).
For example, a command line like "-Ifoo -I foo" would parse to two
Arg instances (a JoinedArg and a SeparateArg instance), but each
would refer to the same Option.
Options are lazily created in order to avoid populating all Option
classes when the driver is loaded. Most of the driver code only needs
to deal with options by their unique ID (e.g., ``options::OPT_I``),
Arg instances themselves do not generally store the values of
parameters. In many cases, this would simply result in creating
unnecessary string copies. Instead, Arg instances are always embedded
inside an ArgList structure, which contains the original vector of
argument strings. Each Arg itself only needs to contain an index into
this vector instead of storing its values directly.
The clang driver can dump the results of this stage using the
``-ccc-print-options`` flag (which must precede any actual command
line arguments). For example:
.. code-block:: console
$ clang -ccc-print-options -Xarch_i386 -fomit-frame-pointer -Wa,-fast -Ifoo -I foo t.c
Option 0 - Name: "-Xarch_", Values: {"i386", "-fomit-frame-pointer"}
Option 1 - Name: "-Wa,", Values: {"-fast"}
Option 2 - Name: "-I", Values: {"foo"}
Option 3 - Name: "-I", Values: {"foo"}
Option 4 - Name: "<input>", Values: {"t.c"}
After this stage is complete the command line should be broken down
into well defined option objects with their appropriate parameters.
Subsequent stages should rarely, if ever, need to do any string
processing.
#. **Pipeline: Compilation Job Construction**
Once the arguments are parsed, the tree of subprocess jobs needed for
the desired compilation sequence are constructed. This involves
determining the input files and their types, what work is to be done
on them (preprocess, compile, assemble, link, etc.), and constructing
a list of Action instances for each task. The result is a list of one
or more top-level actions, each of which generally corresponds to a
single output (for example, an object or linked executable).
The majority of Actions correspond to actual tasks, however there are
two special Actions. The first is InputAction, which simply serves to
adapt an input argument for use as an input to other Actions. The
second is BindArchAction, which conceptually alters the architecture
to be used for all of its input Actions.
The clang driver can dump the results of this stage using the
``-ccc-print-phases`` flag. For example:
.. code-block:: console
$ clang -ccc-print-phases -x c t.c -x assembler t.s
0: input, "t.c", c
1: preprocessor, {0}, cpp-output
2: compiler, {1}, assembler
3: assembler, {2}, object
4: input, "t.s", assembler
5: assembler, {4}, object
6: linker, {3, 5}, image
Here the driver is constructing seven distinct actions, four to
compile the "t.c" input into an object file, two to assemble the
"t.s" input, and one to link them together.
A rather different compilation pipeline is shown here; in this
example there are two top level actions to compile the input files
into two separate object files, where each object file is built using
``lipo`` to merge results built for two separate architectures.
.. code-block:: console
$ clang -ccc-print-phases -c -arch i386 -arch x86_64 t0.c t1.c
0: input, "t0.c", c
1: preprocessor, {0}, cpp-output
2: compiler, {1}, assembler
3: assembler, {2}, object
4: bind-arch, "i386", {3}, object
5: bind-arch, "x86_64", {3}, object
6: lipo, {4, 5}, object
7: input, "t1.c", c
8: preprocessor, {7}, cpp-output
9: compiler, {8}, assembler
10: assembler, {9}, object
11: bind-arch, "i386", {10}, object
12: bind-arch, "x86_64", {10}, object
13: lipo, {11, 12}, object
After this stage is complete the compilation process is divided into
a simple set of actions which need to be performed to produce
intermediate or final outputs (in some cases, like ``-fsyntax-only``,
there is no "real" final output). Phases are well known compilation
steps, such as "preprocess", "compile", "assemble", "link", etc.
#. **Bind: Tool & Filename Selection**
This stage (in conjunction with the Translate stage) turns the tree
of Actions into a list of actual subprocess to run. Conceptually, the
driver performs a top down matching to assign Action(s) to Tools. The
ToolChain is responsible for selecting the tool to perform a
particular action; once selected the driver interacts with the tool
to see if it can match additional actions (for example, by having an
integrated preprocessor).
Once Tools have been selected for all actions, the driver determines
how the tools should be connected (for example, using an inprocess
module, pipes, temporary files, or user provided filenames). If an
output file is required, the driver also computes the appropriate
file name (the suffix and file location depend on the input types and
options such as ``-save-temps``).
The driver interacts with a ToolChain to perform the Tool bindings.
Each ToolChain contains information about all the tools needed for
compilation for a particular architecture, platform, and operating
system. A single driver invocation may query multiple ToolChains
during one compilation in order to interact with tools for separate
architectures.
The results of this stage are not computed directly, but the driver
can print the results via the ``-ccc-print-bindings`` option. For
example:
.. code-block:: console
$ clang -ccc-print-bindings -arch i386 -arch ppc t0.c
# "i386-apple-darwin9" - "clang", inputs: ["t0.c"], output: "/tmp/cc-Sn4RKF.s"
# "i386-apple-darwin9" - "darwin::Assemble", inputs: ["/tmp/cc-Sn4RKF.s"], output: "/tmp/cc-gvSnbS.o"
# "i386-apple-darwin9" - "darwin::Link", inputs: ["/tmp/cc-gvSnbS.o"], output: "/tmp/cc-jgHQxi.out"
# "ppc-apple-darwin9" - "gcc::Compile", inputs: ["t0.c"], output: "/tmp/cc-Q0bTox.s"
# "ppc-apple-darwin9" - "gcc::Assemble", inputs: ["/tmp/cc-Q0bTox.s"], output: "/tmp/cc-WCdicw.o"
# "ppc-apple-darwin9" - "gcc::Link", inputs: ["/tmp/cc-WCdicw.o"], output: "/tmp/cc-HHBEBh.out"
# "i386-apple-darwin9" - "darwin::Lipo", inputs: ["/tmp/cc-jgHQxi.out", "/tmp/cc-HHBEBh.out"], output: "a.out"
This shows the tool chain, tool, inputs and outputs which have been
bound for this compilation sequence. Here clang is being used to
compile t0.c on the i386 architecture and darwin specific versions of
the tools are being used to assemble and link the result, but generic
gcc versions of the tools are being used on PowerPC.
#. **Translate: Tool Specific Argument Translation**
Once a Tool has been selected to perform a particular Action, the
Tool must construct concrete Jobs which will be executed during
compilation. The main work is in translating from the gcc style
command line options to whatever options the subprocess expects.
Some tools, such as the assembler, only interact with a handful of
arguments and just determine the path of the executable to call and
pass on their input and output arguments. Others, like the compiler
or the linker, may translate a large number of arguments in addition.
The ArgList class provides a number of simple helper methods to
assist with translating arguments; for example, to pass on only the
last of arguments corresponding to some option, or all arguments for
an option.
The result of this stage is a list of Jobs (executable paths and
argument strings) to execute.
#. **Execute**
Finally, the compilation pipeline is executed. This is mostly
straightforward, although there is some interaction with options like
``-pipe``, ``-pass-exit-codes`` and ``-time``.
Additional Notes
----------------
The Compilation Object
^^^^^^^^^^^^^^^^^^^^^^
The driver constructs a Compilation object for each set of command line
arguments. The Driver itself is intended to be invariant during
construction of a Compilation; an IDE should be able to construct a
single long lived driver instance to use for an entire build, for
example.
The Compilation object holds information that is particular to each
compilation sequence. For example, the list of used temporary files
(which must be removed once compilation is finished) and result files
(which should be removed if compilation fails).
Unified Parsing & Pipelining
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Parsing and pipelining both occur without reference to a Compilation
instance. This is by design; the driver expects that both of these
phases are platform neutral, with a few very well defined exceptions
such as whether the platform uses a driver driver.
ToolChain Argument Translation
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
In order to match gcc very closely, the clang driver currently allows
tool chains to perform their own translation of the argument list (into
a new ArgList data structure). Although this allows the clang driver to
match gcc easily, it also makes the driver operation much harder to
understand (since the Tools stop seeing some arguments the user
provided, and see new ones instead).
For example, on Darwin ``-gfull`` gets translated into two separate
arguments, ``-g`` and ``-fno-eliminate-unused-debug-symbols``. Trying to
write Tool logic to do something with ``-gfull`` will not work, because
Tool argument translation is done after the arguments have been
translated.
A long term goal is to remove this tool chain specific translation, and
instead force each tool to change its own logic to do the right thing on
the untranslated original arguments.
Unused Argument Warnings
^^^^^^^^^^^^^^^^^^^^^^^^
The driver operates by parsing all arguments but giving Tools the
opportunity to choose which arguments to pass on. One downside of this
infrastructure is that if the user misspells some option, or is confused
about which options to use, some command line arguments the user really
cared about may go unused. This problem is particularly important when
using clang as a compiler, since the clang compiler does not support
anywhere near all the options that gcc does, and we want to make sure
users know which ones are being used.
To support this, the driver maintains a bit associated with each
argument of whether it has been used (at all) during the compilation.
This bit usually doesn't need to be set by hand, as the key ArgList
accessors will set it automatically.
When a compilation is successful (there are no errors), the driver
checks the bit and emits an "unused argument" warning for any arguments
which were never accessed. This is conservative (the argument may not
have been used to do what the user wanted) but still catches the most
obvious cases.
Relation to GCC Driver Concepts
-------------------------------
For those familiar with the gcc driver, this section provides a brief
overview of how things from the gcc driver map to the clang driver.
- **Driver Driver**
The driver driver is fully integrated into the clang driver. The
driver simply constructs additional Actions to bind the architecture
during the *Pipeline* phase. The tool chain specific argument
translation is responsible for handling ``-Xarch_``.
The one caveat is that this approach requires ``-Xarch_`` not be used
to alter the compilation itself (for example, one cannot provide
``-S`` as an ``-Xarch_`` argument). The driver attempts to reject
such invocations, and overall there isn't a good reason to abuse
``-Xarch_`` to that end in practice.
The upside is that the clang driver is more efficient and does little
extra work to support universal builds. It also provides better error
reporting and UI consistency.
- **Specs**
The clang driver has no direct correspondent for "specs". The
majority of the functionality that is embedded in specs is in the
Tool specific argument translation routines. The parts of specs which
control the compilation pipeline are generally part of the *Pipeline*
stage.
- **Toolchains**
The gcc driver has no direct understanding of tool chains. Each gcc
binary roughly corresponds to the information which is embedded
inside a single ToolChain.
The clang driver is intended to be portable and support complex
compilation environments. All platform and tool chain specific code
should be protected behind either abstract or well defined interfaces
(such as whether the platform supports use as a driver driver).

View File

@ -0,0 +1,80 @@
=======================
External Clang Examples
=======================
Introduction
============
This page provides some examples of the kinds of things that people have
done with Clang that might serve as useful guides (or starting points) from
which to develop your own tools. They may be helpful even for something as
banal (but necessary) as how to set up your build to integrate Clang.
Clang's library-based design is deliberately aimed at facilitating use by
external projects, and we are always interested in improving Clang to
better serve our external users. Some typical categories of applications
where Clang is used are:
- Static analysis.
- Documentation/cross-reference generation.
If you know of (or wrote!) a tool or project using Clang, please send an
email to Clang's `development discussion mailing list
<http://lists.cs.uiuc.edu/mailman/listinfo/cfe-dev>`_ to have it added.
(or if you are already a Clang contributor, feel free to directly commit
additions). Since the primary purpose of this page is to provide examples
that can help developers, generally they must have code available.
List of projects and tools
==========================
`<https://github.com/Andersbakken/rtags/>`_
"RTags is a client/server application that indexes c/c++ code and keeps
a persistent in-memory database of references, symbolnames, completions
etc."
`<http://rprichard.github.com/sourceweb/>`_
"A C/C++ source code indexer and navigator"
`<https://github.com/etaoins/qconnectlint>`_
"qconnectlint is a Clang tool for statically verifying the consistency
of signal and slot connections made with Qt's ``QObject::connect``."
`<https://github.com/woboq/woboq_codebrowser>`_
"The Woboq Code Browser is a web-based code browser for C/C++ projects.
Check out `<http://code.woboq.org/>`_ for an example!"
`<https://github.com/mozilla/dxr>`_
"DXR is a source code cross-reference tool that uses static analysis
data collected by instrumented compilers."
`<https://github.com/eschulte/clang-mutate>`_
"This tool performs a number of operations on C-language source files."
`<https://github.com/gmarpons/Crisp>`_
"A coding rule validation add-on for LLVM/clang. Crisp rules are written
in Prolog. A high-level declarative DSL to easily write new rules is under
development. It will be called CRISP, an acronym for *Coding Rules in
Sugared Prolog*."
`<https://github.com/drothlis/clang-ctags>`_
"Generate tag file for C++ source code."
`<https://github.com/exclipy/clang_indexer>`_
"This is an indexer for C and C++ based on the libclang library."
`<https://github.com/holtgrewe/linty>`_
"Linty - C/C++ Style Checking with Python & libclang."
`<https://github.com/axw/cmonster>`_
"cmonster is a Python wrapper for the Clang C++ parser."
`<https://github.com/rizsotto/Constantine>`_
"Constantine is a toy project to learn how to write clang plugin.
Implements pseudo const analysis. Generates warnings about variables,
which were declared without const qualifier."
`<https://github.com/jessevdk/cldoc>`_
"cldoc is a Clang based documentation generator for C and C++.
cldoc tries to solve the issue of writing C/C++ software documentation
with a modern, non-intrusive and robust approach."

64
docs/FAQ.rst Normal file
View File

@ -0,0 +1,64 @@
================================
Frequently Asked Questions (FAQ)
================================
.. contents::
:local:
Driver
======
I run ``clang -cc1 ...`` and get weird errors about missing headers
-------------------------------------------------------------------
Given this source file:
.. code-block:: c
#include <stdio.h>
int main() {
printf("Hello world\n");
}
If you run:
.. code-block:: console
$ clang -cc1 hello.c
hello.c:1:10: fatal error: 'stdio.h' file not found
#include <stdio.h>
^
1 error generated.
``clang -cc1`` is the frontend, ``clang`` is the :doc:`driver
<DriverInternals>`. The driver invokes the frontend with options appropriate
for your system. To see these options, run:
.. code-block:: console
$ clang -### -c hello.c
Some clang command line options are driver-only options, some are frontend-only
options. Frontend-only options are intended to be used only by clang developers.
Users should not run ``clang -cc1`` directly, because ``-cc1`` options are not
guaranteed to be stable.
If you want to use a frontend-only option ("a ``-cc1`` option"), for example
``-ast-dump``, then you need to take the ``clang -cc1`` line generated by the
driver and add the option you need. Alternatively, you can run
``clang -Xclang <option> ...`` to force the driver pass ``<option>`` to
``clang -cc1``.
I get errors about some headers being missing (``stddef.h``, ``stdarg.h``)
--------------------------------------------------------------------------
Some header files (``stddef.h``, ``stdarg.h``, and others) are shipped with
Clang --- these are called builtin includes. Clang searches for them in a
directory relative to the location of the ``clang`` binary. If you moved the
``clang`` binary, you need to move the builtin headers, too.
More information can be found in the :ref:`libtooling_builtin_includes`
section.

View File

@ -1,212 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>How To Setup Clang Tooling For LLVM</title>
<link type="text/css" rel="stylesheet" href="../menu.css">
<link type="text/css" rel="stylesheet" href="../content.css">
</head>
<body>
<!--#include virtual="../menu.html.incl"-->
<div id="content">
<h1>How To Setup Clang Tooling For LLVM</h1>
<p>Clang Tooling provides infrastructure to write tools that need syntactic and
semantic infomation about a program. This term also relates to a set of specific
tools using this infrastructure (e.g. <code>clang-check</code>). This document
provides information on how to set up and use Clang Tooling for the LLVM source
code.</p>
<!-- ======================================================================= -->
<h2><a name="introduction">Introduction</a></h2>
<!-- ======================================================================= -->
<p>Clang Tooling needs a compilation database to figure out specific build
options for each file. Currently it can create a compilation database from the
<code>compilation_commands.json</code> file, generated by CMake. When invoking
clang tools, you can either specify a path to a build directory using a command
line parameter <code>-p</code> or let Clang Tooling find this file in your
source tree. In either case you need to configure your build using CMake to use
clang tools.</p>
<!-- ======================================================================= -->
<h2><a name="using-make">Setup Clang Tooling Using CMake and Make</a></h2>
<!-- ======================================================================= -->
<p>If you intend to use make to build LLVM, you should have CMake 2.8.6 or later
installed (can be found <a href="http://cmake.org">here</a>).</p>
<p>First, you need to generate Makefiles for LLVM with CMake. You need to make
a build directory and run CMake from it:</p>
<pre>
mkdir your/build/directory
cd your/build/directory
cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON path/to/llvm/sources
</pre>
<p>If you want to use clang instead of GCC, you can add
<code>-DCMAKE_C_COMPILER=/path/to/clang
-DCMAKE_CXX_COMPILER=/path/to/clang++</code>.
You can also use ccmake, which provides a curses interface to configure CMake
variables for lazy people.</p>
<p>As a result, the new <code>compile_commands.json</code> file should appear in
the current directory. You should link it to the LLVM source tree so that Clang
Tooling is able to use it:</p>
<pre>
ln -s $PWD/compile_commands.json path/to/llvm/source/
</pre>
<p>Now you are ready to build and test LLVM using make:</p>
<pre>
make check-all
</pre>
<!-- ======================================================================= -->
<h2><a name="using-tools">Using Clang Tools</a></h2>
<!-- ======================================================================= -->
<p>After you completed the previous steps, you are ready to run clang tools. If
you have a recent clang installed, you should have <code>clang-check</code> in
$PATH. Try to run it on any .cpp file inside the LLVM source tree:</p>
<pre>
clang-check tools/clang/lib/Tooling/CompilationDatabase.cpp
</pre>
<p>If you're using vim, it's convenient to have clang-check integrated. Put this
into your .vimrc:</p>
<pre>
function! ClangCheckImpl(cmd)
if &amp;autowrite | wall | endif
echo "Running " . a:cmd . " ..."
let l:output = system(a:cmd)
cexpr l:output
cwindow
let w:quickfix_title = a:cmd
if v:shell_error != 0
cc
endif
let g:clang_check_last_cmd = a:cmd
endfunction
function! ClangCheck()
let l:filename = expand('%')
if l:filename =~ '\.\(cpp\|cxx\|cc\|c\)$'
call ClangCheckImpl("clang-check " . l:filename)
elseif exists("g:clang_check_last_cmd")
call ClangCheckImpl(g:clang_check_last_cmd)
else
echo "Can't detect file's compilation arguments and no previous clang-check invocation!"
endif
endfunction
nmap &lt;silent&gt; &lt;F5&gt; :call ClangCheck()&lt;CR&gt;&lt;CR&gt;
</pre>
<p>When editing a .cpp/.cxx/.cc/.c file, hit F5 to reparse the file. In case
the current file has a different extension (for example, .h), F5 will re-run
the last clang-check invocation made from this vim instance (if any). The
output will go into the error window, which is opened automatically when
clang-check finds errors, and can be re-opened with <code>:cope</code>.</p>
<p>Other <code>clang-check</code> options that can be useful when working with
clang AST:</p>
<ul>
<li><code>-ast-print</code> - Build ASTs and then pretty-print them.</li>
<li><code>-ast-dump</code> - Build ASTs and then debug dump them.</li>
<li><code>-ast-dump-filter=&lt;string&gt;</code> - Use with
<code>-ast-dump</code> or <code>-ast-print</code> to dump/print
only AST declaration nodes having a certain substring in a qualified name.
Use <code>-ast-list</code> to list all filterable declaration node
names.</li>
<li><code>-ast-list</code> - Build ASTs and print the list of declaration
node qualified names.</li>
</ul>
<p>Examples:</p>
<pre>
<b>$ clang-check tools/clang/tools/clang-check/ClangCheck.cpp -ast-dump -ast-dump-filter ActionFactory::newASTConsumer</b>
Processing: tools/clang/tools/clang-check/ClangCheck.cpp.
Dumping <anonymous namespace>::ActionFactory::newASTConsumer:
clang::ASTConsumer *newASTConsumer() (CompoundStmt 0x44da290 &lt;/home/alexfh/local/llvm/tools/clang/tools/clang-check/ClangCheck.cpp:64:40, line:72:3&gt;
(IfStmt 0x44d97c8 &lt;line:65:5, line:66:45&gt;
&lt;&lt;&lt;NULL&gt;&gt;&gt;
(ImplicitCastExpr 0x44d96d0 &lt;line:65:9&gt; '_Bool':'_Bool' &lt;UserDefinedConversion&gt;
...
<b>$ clang-check tools/clang/tools/clang-check/ClangCheck.cpp -ast-print -ast-dump-filter ActionFactory::newASTConsumer</b>
Processing: tools/clang/tools/clang-check/ClangCheck.cpp.
Printing &lt;anonymous namespace&gt;::ActionFactory::newASTConsumer:
clang::ASTConsumer *newASTConsumer() {
if (this-&gt;ASTList.operator _Bool())
return clang::CreateASTDeclNodeLister();
if (this-&gt;ASTDump.operator _Bool())
return clang::CreateASTDumper(this-&gt;ASTDumpFilter);
if (this-&gt;ASTPrint.operator _Bool())
return clang::CreateASTPrinter(&amp;llvm::outs(), this-&gt;ASTDumpFilter);
return new clang::ASTConsumer();
}
</pre>
<!-- ======================================================================= -->
<h2><a name="using-ninja">(Experimental) Using Ninja Build System</a></h2>
<!-- ======================================================================= -->
<p>Optionally you can use the <a
href="https://github.com/martine/ninja">Ninja</a> build system instead of
make. It is aimed at making your builds faster. Currently this step will require
building Ninja from sources and using a development version of CMake.</p>
<p>To take advantage of using Clang Tools along with Ninja build you need at
least CMake 2.8.9. At the moment CMake 2.8.9 is still under development, so you
can get latest development sources and build it yourself:</p>
<pre>
git clone git://cmake.org/cmake.git
cd cmake
./bootstrap
make
sudo make install
</pre>
<p>Having the correct version of CMake, you can clone the Ninja git repository
and build Ninja from sources:</p>
<pre>
git clone git://github.com/martine/ninja.git
cd ninja/
./bootstrap.py
</pre>
<p>This will result in a single binary <code>ninja</code> in the current
directory. It doesn't require installation and can just be copied to any
location inside <code>$PATH</code>, say <code>/usr/local/bin/</code>:</p>
<pre>
sudo cp ninja /usr/local/bin/
sudo chmod a+rx /usr/local/bin/ninja
</pre>
<p>After doing all of this, you'll need to generate Ninja build files for LLVM
with CMake. You need to make a build directory and run CMake from it:</p>
<pre>
mkdir your/build/directory
cd your/build/directory
cmake -G Ninja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON path/to/llvm/sources
</pre>
<p>If you want to use clang instead of GCC, you can add
<code>-DCMAKE_C_COMPILER=/path/to/clang
-DCMAKE_CXX_COMPILER=/path/to/clang++</code>.
You can also use ccmake, which provides a curses interface to configure CMake
variables in an interactive manner.</p>
<p>As a result, the new <code>compile_commands.json</code> file should appear in
the current directory. You should link it to the LLVM source tree so that Clang
Tooling is able to use it:</p>
<pre>
ln -s $PWD/compile_commands.json path/to/llvm/source/
</pre>
<p>Now you are ready to build and test LLVM using Ninja:</p>
<pre>
ninja check-all
</pre>
<p>Other target names can be used in the same way as with make.</p>
</div>
</body>
</html>

View File

@ -0,0 +1,199 @@
===================================
How To Setup Clang Tooling For LLVM
===================================
Clang Tooling provides infrastructure to write tools that need syntactic
and semantic information about a program. This term also relates to a set
of specific tools using this infrastructure (e.g. ``clang-check``). This
document provides information on how to set up and use Clang Tooling for
the LLVM source code.
Introduction
============
Clang Tooling needs a compilation database to figure out specific build
options for each file. Currently it can create a compilation database
from the ``compilation_commands.json`` file, generated by CMake. When
invoking clang tools, you can either specify a path to a build directory
using a command line parameter ``-p`` or let Clang Tooling find this
file in your source tree. In either case you need to configure your
build using CMake to use clang tools.
Setup Clang Tooling Using CMake and Make
========================================
If you intend to use make to build LLVM, you should have CMake 2.8.6 or
later installed (can be found `here <http://cmake.org>`_).
First, you need to generate Makefiles for LLVM with CMake. You need to
make a build directory and run CMake from it:
.. code-block:: console
$ mkdir your/build/directory
$ cd your/build/directory
$ cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON path/to/llvm/sources
If you want to use clang instead of GCC, you can add
``-DCMAKE_C_COMPILER=/path/to/clang -DCMAKE_CXX_COMPILER=/path/to/clang++``.
You can also use ``ccmake``, which provides a curses interface to configure
CMake variables for lazy people.
As a result, the new ``compile_commands.json`` file should appear in the
current directory. You should link it to the LLVM source tree so that
Clang Tooling is able to use it:
.. code-block:: console
$ ln -s $PWD/compile_commands.json path/to/llvm/source/
Now you are ready to build and test LLVM using make:
.. code-block:: console
$ make check-all
Using Clang Tools
=================
After you completed the previous steps, you are ready to run clang tools. If
you have a recent clang installed, you should have ``clang-check`` in
``$PATH``. Try to run it on any ``.cpp`` file inside the LLVM source tree:
.. code-block:: console
$ clang-check tools/clang/lib/Tooling/CompilationDatabase.cpp
If you're using vim, it's convenient to have clang-check integrated. Put
this into your ``.vimrc``:
::
function! ClangCheckImpl(cmd)
if &autowrite | wall | endif
echo "Running " . a:cmd . " ..."
let l:output = system(a:cmd)
cexpr l:output
cwindow
let w:quickfix_title = a:cmd
if v:shell_error != 0
cc
endif
let g:clang_check_last_cmd = a:cmd
endfunction
function! ClangCheck()
let l:filename = expand('%')
if l:filename =~ '\.\(cpp\|cxx\|cc\|c\)$'
call ClangCheckImpl("clang-check " . l:filename)
elseif exists("g:clang_check_last_cmd")
call ClangCheckImpl(g:clang_check_last_cmd)
else
echo "Can't detect file's compilation arguments and no previous clang-check invocation!"
endif
endfunction
nmap <silent> <F5> :call ClangCheck()<CR><CR>
When editing a .cpp/.cxx/.cc/.c file, hit F5 to reparse the file. In
case the current file has a different extension (for example, .h), F5
will re-run the last clang-check invocation made from this vim instance
(if any). The output will go into the error window, which is opened
automatically when clang-check finds errors, and can be re-opened with
``:cope``.
Other ``clang-check`` options that can be useful when working with clang
AST:
* ``-ast-print`` --- Build ASTs and then pretty-print them.
* ``-ast-dump`` --- Build ASTs and then debug dump them.
* ``-ast-dump-filter=<string>`` --- Use with ``-ast-dump`` or ``-ast-print`` to
dump/print only AST declaration nodes having a certain substring in a
qualified name. Use ``-ast-list`` to list all filterable declaration node
names.
* ``-ast-list`` --- Build ASTs and print the list of declaration node qualified
names.
Examples:
.. code-block:: console
$ clang-check tools/clang/tools/clang-check/ClangCheck.cpp -ast-dump -ast-dump-filter ActionFactory::newASTConsumer
Processing: tools/clang/tools/clang-check/ClangCheck.cpp.
Dumping ::ActionFactory::newASTConsumer:
clang::ASTConsumer *newASTConsumer() (CompoundStmt 0x44da290 </home/alexfh/local/llvm/tools/clang/tools/clang-check/ClangCheck.cpp:64:40, line:72:3>
(IfStmt 0x44d97c8 <line:65:5, line:66:45>
<<<NULL>>>
(ImplicitCastExpr 0x44d96d0 <line:65:9> '_Bool':'_Bool' <UserDefinedConversion>
...
$ clang-check tools/clang/tools/clang-check/ClangCheck.cpp -ast-print -ast-dump-filter ActionFactory::newASTConsumer
Processing: tools/clang/tools/clang-check/ClangCheck.cpp.
Printing <anonymous namespace>::ActionFactory::newASTConsumer:
clang::ASTConsumer *newASTConsumer() {
if (this->ASTList.operator _Bool())
return clang::CreateASTDeclNodeLister();
if (this->ASTDump.operator _Bool())
return clang::CreateASTDumper(this->ASTDumpFilter);
if (this->ASTPrint.operator _Bool())
return clang::CreateASTPrinter(&llvm::outs(), this->ASTDumpFilter);
return new clang::ASTConsumer();
}
(Experimental) Using Ninja Build System
=======================================
Optionally you can use the `Ninja <https://github.com/martine/ninja>`_
build system instead of make. It is aimed at making your builds faster.
Currently this step will require building Ninja from sources.
To take advantage of using Clang Tools along with Ninja build you need
at least CMake 2.8.9.
Clone the Ninja git repository and build Ninja from sources:
.. code-block:: console
$ git clone git://github.com/martine/ninja.git
$ cd ninja/
$ ./bootstrap.py
This will result in a single binary ``ninja`` in the current directory.
It doesn't require installation and can just be copied to any location
inside ``$PATH``, say ``/usr/local/bin/``:
.. code-block:: console
$ sudo cp ninja /usr/local/bin/
$ sudo chmod a+rx /usr/local/bin/ninja
After doing all of this, you'll need to generate Ninja build files for
LLVM with CMake. You need to make a build directory and run CMake from
it:
.. code-block:: console
$ mkdir your/build/directory
$ cd your/build/directory
$ cmake -G Ninja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON path/to/llvm/sources
If you want to use clang instead of GCC, you can add
``-DCMAKE_C_COMPILER=/path/to/clang -DCMAKE_CXX_COMPILER=/path/to/clang++``.
You can also use ``ccmake``, which provides a curses interface to configure
CMake variables in an interactive manner.
As a result, the new ``compile_commands.json`` file should appear in the
current directory. You should link it to the LLVM source tree so that
Clang Tooling is able to use it:
.. code-block:: console
$ ln -s $PWD/compile_commands.json path/to/llvm/source/
Now you are ready to build and test LLVM using Ninja:
.. code-block:: console
$ ninja check-all
Other target names can be used in the same way as with make.

File diff suppressed because it is too large Load Diff

1810
docs/InternalsManual.rst Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,139 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>Introduction to the Clang AST</title>
<link type="text/css" rel="stylesheet" href="../menu.css" />
<link type="text/css" rel="stylesheet" href="../content.css" />
</head>
<body>
<!--#include virtual="../menu.html.incl"-->
<div id="content">
<h1>Introduction to the Clang AST</h1>
<p>This document gives a gentle introduction to the mysteries of the Clang AST.
It is targeted at developers who either want to contribute to Clang, or use
tools that work based on Clang's AST, like the AST matchers.</p>
<!-- FIXME: Add link once we have an AST matcher document -->
<!-- ======================================================================= -->
<h2 id="intro">Introduction</h2>
<!-- ======================================================================= -->
<p>Clang's AST is different from ASTs produced by some other compilers in that it closely
resembles both the written C++ code and the C++ standard. For example,
parenthesis expressions and compile time constants are available in an unreduced
form in the AST. This makes Clang's AST a good fit for refactoring tools.</p>
<p>Documentation for all Clang AST nodes is available via the generated
<a href="http://clang.llvm.org/doxygen">Doxygen</a>. The doxygen online
documentation is also indexed by your favorite search engine, which will make
a search for clang and the AST node's class name usually turn up the doxygen
of the class you're looking for (for example, search for: clang ParenExpr).</p>
<!-- ======================================================================= -->
<h2 id="examine">Examining the AST</h2>
<!-- ======================================================================= -->
<p>A good way to familarize yourself with the Clang AST is to actually look
at it on some simple example code. Clang has a builtin AST-dump modes, which
can be enabled with the flags -ast-dump and -ast-dump-xml. Note that -ast-dump-xml
currently only works with debug-builds of clang.</p>
<p>Let's look at a simple example AST:</p>
<pre>
# cat test.cc
int f(int x) {
int result = (x / 42);
return result;
}
# Clang by default is a frontend for many tools; -cc1 tells it to directly
# use the C++ compiler mode. -undef leaves out some internal declarations.
$ clang -cc1 -undef -ast-dump-xml test.cc
... cutting out internal declarations of clang ...
&lt;TranslationUnit ptr="0x4871160">
&lt;Function ptr="0x48a5800" name="f" prototype="true">
&lt;FunctionProtoType ptr="0x4871de0" canonical="0x4871de0">
&lt;BuiltinType ptr="0x4871250" canonical="0x4871250"/>
&lt;parameters>
&lt;BuiltinType ptr="0x4871250" canonical="0x4871250"/>
&lt;/parameters>
&lt;/FunctionProtoType>
&lt;ParmVar ptr="0x4871d80" name="x" initstyle="c">
&lt;BuiltinType ptr="0x4871250" canonical="0x4871250"/>
&lt;/ParmVar>
&lt;Stmt>
(CompoundStmt 0x48a5a38 &lt;t2.cc:1:14, line:4:1>
(DeclStmt 0x48a59c0 &lt;line:2:3, col:24>
0x48a58c0 "int result =
(ParenExpr 0x48a59a0 &lt;col:16, col:23> 'int'
(BinaryOperator 0x48a5978 &lt;col:17, col:21> 'int' '/'
(ImplicitCastExpr 0x48a5960 &lt;col:17> 'int' &lt;LValueToRValue>
(DeclRefExpr 0x48a5918 &lt;col:17> 'int' lvalue ParmVar 0x4871d80 'x' 'int'))
(IntegerLiteral 0x48a5940 &lt;col:21> 'int' 42)))")
(ReturnStmt 0x48a5a18 &lt;line:3:3, col:10>
(ImplicitCastExpr 0x48a5a00 &lt;col:10> 'int' &lt;LValueToRValue>
(DeclRefExpr 0x48a59d8 &lt;col:10> 'int' lvalue Var 0x48a58c0 'result' 'int'))))
&lt;/Stmt>
&lt;/Function>
&lt;/TranslationUnit>
</pre>
<p>In general, -ast-dump-xml dumps declarations in an XML-style format and
statements in an S-expression-style format.
The toplevel declaration in a translation unit is always the
<a href="http://clang.llvm.org/doxygen/classclang_1_1TranslationUnitDecl.html">translation unit declaration</a>.
In this example, our first user written declaration is the
<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">function declaration</a>
of 'f'. The body of 'f' is a <a href="http://clang.llvm.org/doxygen/classclang_1_1CompoundStmt.html">compound statement</a>,
whose child nodes are a <a href="http://clang.llvm.org/doxygen/classclang_1_1DeclStmt.html">declaration statement</a>
that declares our result variable, and the
<a href="http://clang.llvm.org/doxygen/classclang_1_1ReturnStmt.html">return statement</a>.</p>
<!-- ======================================================================= -->
<h2 id="context">AST Context</h2>
<!-- ======================================================================= -->
<p>All information about the AST for a translation unit is bundled up in the class
<a href="http://clang.llvm.org/doxygen/classclang_1_1ASTContext.html">ASTContext</a>.
It allows traversal of the whole translation unit starting from
<a href="http://clang.llvm.org/doxygen/classclang_1_1ASTContext.html#abd909fb01ef10cfd0244832a67b1dd64">getTranslationUnitDecl</a>,
or to access Clang's <a href="http://clang.llvm.org/doxygen/classclang_1_1ASTContext.html#a4f95adb9958e22fbe55212ae6482feb4">table of identifiers</a>
for the parsed translation unit.</p>
<!-- ======================================================================= -->
<h2 id="nodes">AST Nodes</h2>
<!-- ======================================================================= -->
<p>Clang's AST nodes are modeled on a class hierarchy that does not have a common
ancestor. Instead, there are multiple larger hierarchies for basic node types like
<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a> and
<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>. Many
important AST nodes derive from <a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>,
<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>,
<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclContext.html">DeclContext</a> or
<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>,
with some classes deriving from both Decl and DeclContext.</p>
<p>There are also a multitude of nodes in the AST that are not part of a
larger hierarchy, and are only reachable from specific other nodes,
like <a href="http://clang.llvm.org/doxygen/classclang_1_1CXXBaseSpecifier.html">CXXBaseSpecifier</a>.
</p>
<p>Thus, to traverse the full AST, one starts from the <a href="http://clang.llvm.org/doxygen/classclang_1_1TranslationUnitDecl.html">TranslationUnitDecl</a>
and then recursively traverses everything that can be reached from that node
- this information has to be encoded for each specific node type. This algorithm
is encoded in the <a href="http://clang.llvm.org/doxygen/classclang_1_1RecursiveASTVisitor.html">RecursiveASTVisitor</a>.
See the <a href="http://clang.llvm.org/docs/RAVFrontendAction.html">RecursiveASTVisitor tutorial</a>.</p>
<p>The two most basic nodes in the Clang AST are statements (<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>)
and declarations (<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>).
Note that expressions (<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>)
are also statements in Clang's AST.</p>
</div>
</body>
</html>

View File

@ -0,0 +1,135 @@
=============================
Introduction to the Clang AST
=============================
This document gives a gentle introduction to the mysteries of the Clang
AST. It is targeted at developers who either want to contribute to
Clang, or use tools that work based on Clang's AST, like the AST
matchers.
Introduction
============
Clang's AST is different from ASTs produced by some other compilers in
that it closely resembles both the written C++ code and the C++
standard. For example, parenthesis expressions and compile time
constants are available in an unreduced form in the AST. This makes
Clang's AST a good fit for refactoring tools.
Documentation for all Clang AST nodes is available via the generated
`Doxygen <http://clang.llvm.org/doxygen>`_. The doxygen online
documentation is also indexed by your favorite search engine, which will
make a search for clang and the AST node's class name usually turn up
the doxygen of the class you're looking for (for example, search for:
clang ParenExpr).
Examining the AST
=================
A good way to familarize yourself with the Clang AST is to actually look
at it on some simple example code. Clang has a builtin AST-dump modes,
which can be enabled with the flags ``-ast-dump`` and ``-ast-dump-xml``. Note
that ``-ast-dump-xml`` currently only works with debug builds of clang.
Let's look at a simple example AST:
::
$ cat test.cc
int f(int x) {
int result = (x / 42);
return result;
}
# Clang by default is a frontend for many tools; -cc1 tells it to directly
# use the C++ compiler mode. -undef leaves out some internal declarations.
$ clang -cc1 -undef -ast-dump-xml test.cc
... cutting out internal declarations of clang ...
<TranslationUnit ptr="0x4871160">
<Function ptr="0x48a5800" name="f" prototype="true">
<FunctionProtoType ptr="0x4871de0" canonical="0x4871de0">
<BuiltinType ptr="0x4871250" canonical="0x4871250"/>
<parameters>
<BuiltinType ptr="0x4871250" canonical="0x4871250"/>
</parameters>
</FunctionProtoType>
<ParmVar ptr="0x4871d80" name="x" initstyle="c">
<BuiltinType ptr="0x4871250" canonical="0x4871250"/>
</ParmVar>
<Stmt>
(CompoundStmt 0x48a5a38 <t2.cc:1:14, line:4:1>
(DeclStmt 0x48a59c0 <line:2:3, col:24>
0x48a58c0 "int result =
(ParenExpr 0x48a59a0 <col:16, col:23> 'int'
(BinaryOperator 0x48a5978 <col:17, col:21> 'int' '/'
(ImplicitCastExpr 0x48a5960 <col:17> 'int' <LValueToRValue>
(DeclRefExpr 0x48a5918 <col:17> 'int' lvalue ParmVar 0x4871d80 'x' 'int'))
(IntegerLiteral 0x48a5940 <col:21> 'int' 42)))")
(ReturnStmt 0x48a5a18 <line:3:3, col:10>
(ImplicitCastExpr 0x48a5a00 <col:10> 'int' <LValueToRValue>
(DeclRefExpr 0x48a59d8 <col:10> 'int' lvalue Var 0x48a58c0 'result' 'int'))))
</Stmt>
</Function>
</TranslationUnit>
In general, ``-ast-dump-xml`` dumps declarations in an XML-style format and
statements in an S-expression-style format. The toplevel declaration in
a translation unit is always the `translation unit
declaration <http://clang.llvm.org/doxygen/classclang_1_1TranslationUnitDecl.html>`_.
In this example, our first user written declaration is the `function
declaration <http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html>`_
of "``f``". The body of "``f``" is a `compound
statement <http://clang.llvm.org/doxygen/classclang_1_1CompoundStmt.html>`_,
whose child nodes are a `declaration
statement <http://clang.llvm.org/doxygen/classclang_1_1DeclStmt.html>`_
that declares our result variable, and the `return
statement <http://clang.llvm.org/doxygen/classclang_1_1ReturnStmt.html>`_.
AST Context
===========
All information about the AST for a translation unit is bundled up in
the class
`ASTContext <http://clang.llvm.org/doxygen/classclang_1_1ASTContext.html>`_.
It allows traversal of the whole translation unit starting from
`getTranslationUnitDecl <http://clang.llvm.org/doxygen/classclang_1_1ASTContext.html#abd909fb01ef10cfd0244832a67b1dd64>`_,
or to access Clang's `table of
identifiers <http://clang.llvm.org/doxygen/classclang_1_1ASTContext.html#a4f95adb9958e22fbe55212ae6482feb4>`_
for the parsed translation unit.
AST Nodes
=========
Clang's AST nodes are modeled on a class hierarchy that does not have a
common ancestor. Instead, there are multiple larger hierarchies for
basic node types like
`Decl <http://clang.llvm.org/doxygen/classclang_1_1Decl.html>`_ and
`Stmt <http://clang.llvm.org/doxygen/classclang_1_1Stmt.html>`_. Many
important AST nodes derive from
`Type <http://clang.llvm.org/doxygen/classclang_1_1Type.html>`_,
`Decl <http://clang.llvm.org/doxygen/classclang_1_1Decl.html>`_,
`DeclContext <http://clang.llvm.org/doxygen/classclang_1_1DeclContext.html>`_
or `Stmt <http://clang.llvm.org/doxygen/classclang_1_1Stmt.html>`_, with
some classes deriving from both Decl and DeclContext.
There are also a multitude of nodes in the AST that are not part of a
larger hierarchy, and are only reachable from specific other nodes, like
`CXXBaseSpecifier <http://clang.llvm.org/doxygen/classclang_1_1CXXBaseSpecifier.html>`_.
Thus, to traverse the full AST, one starts from the
`TranslationUnitDecl <http://clang.llvm.org/doxygen/classclang_1_1TranslationUnitDecl.html>`_
and then recursively traverses everything that can be reached from that
node - this information has to be encoded for each specific node type.
This algorithm is encoded in the
`RecursiveASTVisitor <http://clang.llvm.org/doxygen/classclang_1_1RecursiveASTVisitor.html>`_.
See the `RecursiveASTVisitor
tutorial <http://clang.llvm.org/docs/RAVFrontendAction.html>`_.
The two most basic nodes in the Clang AST are statements
(`Stmt <http://clang.llvm.org/doxygen/classclang_1_1Stmt.html>`_) and
declarations
(`Decl <http://clang.llvm.org/doxygen/classclang_1_1Decl.html>`_). Note
that expressions
(`Expr <http://clang.llvm.org/doxygen/classclang_1_1Expr.html>`_) are
also statements in Clang's AST.

View File

@ -1,89 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>JSON Compilation Database Format Specification</title>
<link type="text/css" rel="stylesheet" href="../menu.css">
<link type="text/css" rel="stylesheet" href="../content.css">
</head>
<body>
<!--#include virtual="../menu.html.incl"-->
<div id="content">
<h1>JSON Compilation Database Format Specification</h1>
<p>This document describes a format for specifying how to replay
single compilations independently of the build system.</p>
<h2>Background</h2>
<p>Tools based on the C++ Abstract Syntax Tree need full information how to
parse a translation unit. Usually this information is implicitly
available in the build system, but running tools as part of
the build system is not necessarily the best solution:
<ul>
<li>Build systems are inherently change driven, so running multiple
tools over the same code base without changing the code does not fit
into the architecture of many build systems.</li>
<li>Figuring out whether things have changed is often an IO bound
process; this makes it hard to build low latency end user tools based
on the build system.</li>
<li>Build systems are inherently sequential in the build graph, for example
due to generated source code. While tools that run independently of the
build still need the generated source code to exist, running tools multiple
times over unchanging source does not require serialization of the runs
according to the build dependency graph.</li>
</ul>
</p>
<h2>Supported Systems</h2>
<p>Currently <a href="http://cmake.org">CMake</a> (since 2.8.5) supports generation of compilation
databases for Unix Makefile builds (Ninja builds in the works) with the option
CMAKE_EXPORT_COMPILE_COMMANDS.</p>
<p>Clang's tooling interface supports reading compilation databases; see
the <a href="LibTooling.html">LibTooling documentation</a>. libclang and its
python bindings also support this (since clang 3.2); see
<a href="/doxygen/group__COMPILATIONDB.html">CXCompilationDatabase.h</a>.</p>
<h2>Format</h2>
<p>A compilation database is a JSON file, which consist of an array of
"command objects", where each command object specifies one way a translation unit
is compiled in the project.</p>
<p>Each command object contains the translation unit's main file, the working
directory of the compile run and the actual compile command.</p>
<p>Example:
<pre>
[
{ "directory": "/home/user/llvm/build",
"command": "/usr/bin/clang++ -Irelative -DSOMEDEF='\"With spaces and quotes.\"' -c -o file.o file.cc",
"file": "file.cc" },
...
]
</pre>
The contracts for each field in the command object are:
<ul>
<li><b>directory:</b> The working directory of the compilation. All paths specified
in the <b>command</b> or <b>file</b> fields must be either absolute or relative to
this directory.</li>
<li><b>file:</b> The main translation unit source processed by this compilation step.
This is used by tools as the key into the compilation database. There can be multiple
command objects for the same file, for example if the same source file is
compiled with different configurations.</li>
<li><b>command:</b> The compile command executed. After JSON unescaping, this must
be a valid command to rerun the exact compilation step for the translation unit in
the environment the build system uses. Parameters use shell quoting and shell escaping
of quotes, with '"' and '\' being the only special characters. Shell expansion is
not supported.</li>
</ul>
</p>
<h2>Build System Integration</h2>
<p>The convention is to name the file compile_commands.json and put it at the top
of the build directory. Clang tools are pointed to the top of the build directory
to detect the file and use the compilation database to parse C++ code in the source
tree.</p>
</div>
</body>
</html>

View File

@ -0,0 +1,88 @@
==============================================
JSON Compilation Database Format Specification
==============================================
This document describes a format for specifying how to replay single
compilations independently of the build system.
Background
==========
Tools based on the C++ Abstract Syntax Tree need full information how to
parse a translation unit. Usually this information is implicitly
available in the build system, but running tools as part of the build
system is not necessarily the best solution:
- Build systems are inherently change driven, so running multiple tools
over the same code base without changing the code does not fit into
the architecture of many build systems.
- Figuring out whether things have changed is often an IO bound
process; this makes it hard to build low latency end user tools based
on the build system.
- Build systems are inherently sequential in the build graph, for
example due to generated source code. While tools that run
independently of the build still need the generated source code to
exist, running tools multiple times over unchanging source does not
require serialization of the runs according to the build dependency
graph.
Supported Systems
=================
Currently `CMake <http://cmake.org>`_ (since 2.8.5) supports generation
of compilation databases for Unix Makefile builds (Ninja builds in the
works) with the option ``CMAKE_EXPORT_COMPILE_COMMANDS``.
For projects on Linux, there is an alternative to intercept compiler
calls with a tool called `Bear <https://github.com/rizsotto/Bear>`_.
Clang's tooling interface supports reading compilation databases; see
the :doc:`LibTooling documentation <LibTooling>`. libclang and its
python bindings also support this (since clang 3.2); see
`CXCompilationDatabase.h </doxygen/group__COMPILATIONDB.html>`_.
Format
======
A compilation database is a JSON file, which consist of an array of
"command objects", where each command object specifies one way a
translation unit is compiled in the project.
Each command object contains the translation unit's main file, the
working directory of the compile run and the actual compile command.
Example:
::
[
{ "directory": "/home/user/llvm/build",
"command": "/usr/bin/clang++ -Irelative -DSOMEDEF=\"With spaces, quotes and \\-es.\" -c -o file.o file.cc",
"file": "file.cc" },
...
]
The contracts for each field in the command object are:
- **directory:** The working directory of the compilation. All paths
specified in the **command** or **file** fields must be either
absolute or relative to this directory.
- **file:** The main translation unit source processed by this
compilation step. This is used by tools as the key into the
compilation database. There can be multiple command objects for the
same file, for example if the same source file is compiled with
different configurations.
- **command:** The compile command executed. After JSON unescaping,
this must be a valid command to rerun the exact compilation step for
the translation unit in the environment the build system uses.
Parameters use shell quoting and shell escaping of quotes, with '``"``'
and '``\``' being the only special characters. Shell expansion is not
supported.
Build System Integration
========================
The convention is to name the file compile\_commands.json and put it at
the top of the build directory. Clang tools are pointed to the top of
the build directory to detect the file and use the compilation database
to parse C++ code in the source tree.

File diff suppressed because it is too large Load Diff

2000
docs/LanguageExtensions.rst Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,130 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>Matching the Clang AST</title>
<link type="text/css" rel="stylesheet" href="../menu.css" />
<link type="text/css" rel="stylesheet" href="../content.css" />
</head>
<body>
<!--#include virtual="../menu.html.incl"-->
<div id="content">
<h1>Matching the Clang AST</h1>
<p>This document explains how to use Clang's LibASTMatchers to match interesting
nodes of the AST and execute code that uses the matched nodes. Combined with
<a href="LibTooling.html">LibTooling</a>, LibASTMatchers helps to write
code-to-code transformation tools or query tools.</p>
<p>We assume basic knowledge about the Clang AST. See the
<a href="IntroductionToTheClangAST.html">Introduction to the Clang AST</a> if
you want to learn more about how the AST is structured.</p>
<!-- FIXME: create tutorial and link to the tutorial -->
<!-- ======================================================================= -->
<h2 id="intro">Introduction</h2>
<!-- ======================================================================= -->
<p>LibASTMatchers provides a domain specific language to create predicates on Clang's
AST. This DSL is written in and can be used from C++, allowing users to write
a single program to both match AST nodes and access the node's C++ interface
to extract attributes, source locations, or any other information provided on
the AST level.</p>
<p>AST matchers are predicates on nodes in the AST. Matchers are created
by calling creator functions that allow building up a tree of matchers, where
inner matchers are used to make the match more specific.</p>
</p>For example, to create a matcher that matches all class or union declarations
in the AST of a translation unit, you can call
<a href="LibASTMatchersReference.html#recordDecl0Anchor">recordDecl()</a>.
To narrow the match down, for example to find all class or union declarations with the name "Foo",
insert a <a href="LibASTMatchersReference.html#hasName0Anchor">hasName</a>
matcher: the call recordDecl(hasName("Foo")) returns a matcher that matches classes
or unions that are named "Foo", in any namespace. By default, matchers that accept
multiple inner matchers use an implicit <a href="LibASTMatchersReference.html#allOf0Anchor">allOf()</a>.
This allows further narrowing down the match, for example to match all classes
that are derived from "Bar": recordDecl(hasName("Foo"), isDerivedFrom("Bar")).</p>
<!-- ======================================================================= -->
<h2 id="writing">How to create a matcher</h2>
<!-- ======================================================================= -->
<p>With more than a thousand classes in the Clang AST, one can quickly get lost
when trying to figure out how to create a matcher for a specific pattern. This
section will teach you how to use a rigorous step-by-step pattern to build the
matcher you are interested in. Note that there will always be matchers missing
for some part of the AST. See the section about <a href="#writing">how to write
your own AST matchers</a> later in this document.</p>
<p>The precondition to using the matchers is to understand how the AST
for what you want to match looks like. The <a href="IntroductionToTheClangAST.html">Introduction to the Clang AST</a>
teaches you how to dump a translation unit's AST into a human readable format.</p>
<!-- FIXME: Introduce link to ASTMatchersTutorial.html -->
<!-- FIXME: Introduce link to ASTMatchersCookbook.html -->
<p>In general, the strategy to create the right matchers is:</p>
<ol>
<li>Find the outermost class in Clang's AST you want to match.</li>
<li>Look at the <a href="LibASTMatchersReference.html">AST Matcher Reference</a> for matchers that either match the
node you're interested in or narrow down attributes on the node.</li>
<li>Create your outer match expression. Verify that it works as expected.</li>
<li>Examine the matchers for what the next inner node you want to match is.</li>
<li>Repeat until the matcher is finished.</li>
</ol>
<!-- ======================================================================= -->
<h2 id="binding">Binding nodes in match expressions</h2>
<!-- ======================================================================= -->
<p>Matcher expressions allow you to specify which parts of the AST are interesting
for a certain task. Often you will want to then do something with the nodes
that were matched, like building source code transformations.</p>
<p>To that end, matchers that match specific AST nodes (so called node matchers)
are bindable; for example, recordDecl(hasName("MyClass")).bind("id") will bind
the matched recordDecl node to the string "id", to be later retrieved in the
<a href="http://clang.llvm.org/doxygen/classclang_1_1ast__matchers_1_1MatchFinder_1_1MatchCallback.html">match callback</a>.</p>
<!-- FIXME: Introduce link to ASTMatchersTutorial.html -->
<!-- FIXME: Introduce link to ASTMatchersCookbook.html -->
<!-- ======================================================================= -->
<h2 id="writing">Writing your own matchers</h2>
<!-- ======================================================================= -->
<p>There are multiple different ways to define a matcher, depending on its
type and flexibility.</p>
<ul>
<li><b>VariadicDynCastAllOfMatcher&ltBase, Derived></b><p>Those match all nodes
of type <i>Base</i> if they can be dynamically casted to <i>Derived</i>. The
names of those matchers are nouns, which closely resemble <i>Derived</i>.
VariadicDynCastAllOfMatchers are the backbone of the matcher hierarchy. Most
often, your match expression will start with one of them, and you can
<a href="#binding">bind</a> the node they represent to ids for later processing.</p>
<p>VariadicDynCastAllOfMatchers are callable classes that model variadic
template functions in C++03. They take an aribtrary number of Matcher&lt;Derived>
and return a Matcher&lt;Base>.</p></li>
<li><b>AST_MATCHER_P(Type, Name, ParamType, Param)</b><p> Most matcher definitions
use the matcher creation macros. Those define both the matcher of type Matcher&lt;Type>
itself, and a matcher-creation function named <i>Name</i> that takes a parameter
of type <i>ParamType</i> and returns the corresponding matcher.</p>
<p>There are multiple matcher definition macros that deal with polymorphic return
values and different parameter counts. See <a href="http://clang.llvm.org/doxygen/ASTMatchersMacros_8h.html">ASTMatchersMacros.h</a>.
</p></li>
<li><b>Matcher creation functions</b><p>Matchers are generated by nesting
calls to matcher creation functions. Most of the time those functions are either
created by using VariadicDynCastAllOfMatcher or the matcher creation macros
(see below). The free-standing functions are an indication that this matcher
is just a combination of other matchers, as is for example the case with
<a href="LibASTMatchersReference.html#callee1Anchor">callee</a>.</p></li>
</ul>
</div>
</body>
</html>

134
docs/LibASTMatchers.rst Normal file
View File

@ -0,0 +1,134 @@
======================
Matching the Clang AST
======================
This document explains how to use Clang's LibASTMatchers to match interesting
nodes of the AST and execute code that uses the matched nodes. Combined with
:doc:`LibTooling`, LibASTMatchers helps to write code-to-code transformation
tools or query tools.
We assume basic knowledge about the Clang AST. See the :doc:`Introduction
to the Clang AST <IntroductionToTheClangAST>` if you want to learn more
about how the AST is structured.
.. FIXME: create tutorial and link to the tutorial
Introduction
------------
LibASTMatchers provides a domain specific language to create predicates on
Clang's AST. This DSL is written in and can be used from C++, allowing users
to write a single program to both match AST nodes and access the node's C++
interface to extract attributes, source locations, or any other information
provided on the AST level.
AST matchers are predicates on nodes in the AST. Matchers are created by
calling creator functions that allow building up a tree of matchers, where
inner matchers are used to make the match more specific.
For example, to create a matcher that matches all class or union declarations
in the AST of a translation unit, you can call `recordDecl()
<LibASTMatchersReference.html#recordDecl0Anchor>`_. To narrow the match down,
for example to find all class or union declarations with the name "``Foo``",
insert a `hasName <LibASTMatchersReference.html#hasName0Anchor>`_ matcher: the
call ``recordDecl(hasName("Foo"))`` returns a matcher that matches classes or
unions that are named "``Foo``", in any namespace. By default, matchers that
accept multiple inner matchers use an implicit `allOf()
<LibASTMatchersReference.html#allOf0Anchor>`_. This allows further narrowing
down the match, for example to match all classes that are derived from
"``Bar``": ``recordDecl(hasName("Foo"), isDerivedFrom("Bar"))``.
How to create a matcher
-----------------------
With more than a thousand classes in the Clang AST, one can quickly get lost
when trying to figure out how to create a matcher for a specific pattern. This
section will teach you how to use a rigorous step-by-step pattern to build the
matcher you are interested in. Note that there will always be matchers missing
for some part of the AST. See the section about :ref:`how to write your own
AST matchers <astmatchers-writing>` later in this document.
.. FIXME: why is it linking back to the same section?!
The precondition to using the matchers is to understand how the AST for what you
want to match looks like. The
:doc:`Introduction to the Clang AST <IntroductionToTheClangAST>` teaches you
how to dump a translation unit's AST into a human readable format.
.. FIXME: Introduce link to ASTMatchersTutorial.html
.. FIXME: Introduce link to ASTMatchersCookbook.html
In general, the strategy to create the right matchers is:
#. Find the outermost class in Clang's AST you want to match.
#. Look at the `AST Matcher Reference <LibASTMatchersReference.html>`_ for
matchers that either match the node you're interested in or narrow down
attributes on the node.
#. Create your outer match expression. Verify that it works as expected.
#. Examine the matchers for what the next inner node you want to match is.
#. Repeat until the matcher is finished.
.. _astmatchers-bind:
Binding nodes in match expressions
----------------------------------
Matcher expressions allow you to specify which parts of the AST are interesting
for a certain task. Often you will want to then do something with the nodes
that were matched, like building source code transformations.
To that end, matchers that match specific AST nodes (so called node matchers)
are bindable; for example, ``recordDecl(hasName("MyClass")).bind("id")`` will
bind the matched ``recordDecl`` node to the string "``id``", to be later
retrieved in the `match callback
<http://clang.llvm.org/doxygen/classclang_1_1ast__matchers_1_1MatchFinder_1_1MatchCallback.html>`_.
.. FIXME: Introduce link to ASTMatchersTutorial.html
.. FIXME: Introduce link to ASTMatchersCookbook.html
Writing your own matchers
-------------------------
There are multiple different ways to define a matcher, depending on its type
and flexibility.
``VariadicDynCastAllOfMatcher<Base, Derived>``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Those match all nodes of type *Base* if they can be dynamically casted to
*Derived*. The names of those matchers are nouns, which closely resemble
*Derived*. ``VariadicDynCastAllOfMatchers`` are the backbone of the matcher
hierarchy. Most often, your match expression will start with one of them, and
you can :ref:`bind <astmatchers-bind>` the node they represent to ids for later
processing.
``VariadicDynCastAllOfMatchers`` are callable classes that model variadic
template functions in C++03. They take an aribtrary number of
``Matcher<Derived>`` and return a ``Matcher<Base>``.
``AST_MATCHER_P(Type, Name, ParamType, Param)``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Most matcher definitions use the matcher creation macros. Those define both
the matcher of type ``Matcher<Type>`` itself, and a matcher-creation function
named *Name* that takes a parameter of type *ParamType* and returns the
corresponding matcher.
There are multiple matcher definition macros that deal with polymorphic return
values and different parameter counts. See `ASTMatchersMacros.h
<http://clang.llvm.org/doxygen/ASTMatchersMacros_8h.html>`_.
.. _astmatchers-writing:
Matcher creation functions
^^^^^^^^^^^^^^^^^^^^^^^^^^
Matchers are generated by nesting calls to matcher creation functions. Most of
the time those functions are either created by using
``VariadicDynCastAllOfMatcher`` or the matcher creation macros (see below).
The free-standing functions are an indication that this matcher is just a
combination of other matchers, as is for example the case with `callee
<LibASTMatchersReference.html#callee1Anchor>`_.
.. FIXME: "... macros (see below)" --- there isn't anything below

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,538 @@
===============================================================
Tutorial for building tools using LibTooling and LibASTMatchers
===============================================================
This document is intended to show how to build a useful source-to-source
translation tool based on Clang's `LibTooling <LibTooling.html>`_. It is
explicitly aimed at people who are new to Clang, so all you should need
is a working knowledge of C++ and the command line.
In order to work on the compiler, you need some basic knowledge of the
abstract syntax tree (AST). To this end, the reader is incouraged to
skim the :doc:`Introduction to the Clang
AST <IntroductionToTheClangAST>`
Step 0: Obtaining Clang
=======================
As Clang is part of the LLVM project, you'll need to download LLVM's
source code first. Both Clang and LLVM are maintained as Subversion
repositories, but we'll be accessing them through the git mirror. For
further information, see the `getting started
guide <http://llvm.org/docs/GettingStarted.html>`_.
.. code-block:: console
mkdir ~/clang-llvm && cd ~/clang-llvm
git clone http://llvm.org/git/llvm.git
cd llvm/tools
git clone http://llvm.org/git/clang.git
Next you need to obtain the CMake build system and Ninja build tool. You
may already have CMake installed, but current binary versions of CMake
aren't built with Ninja support.
.. code-block:: console
cd ~/clang-llvm
git clone https://github.com/martine/ninja.git
cd ninja
git checkout release
./bootstrap.py
sudo cp ninja /usr/bin/
cd ~/clang-llvm
git clone git://cmake.org/stage/cmake.git
cd cmake
git checkout next
./bootstrap
make
sudo make install
Okay. Now we'll build Clang!
.. code-block:: console
cd ~/clang-llvm
mkdir build && cd build
cmake -G Ninja ../llvm -DLLVM_BUILD_TESTS=ON # Enable tests; default is off.
ninja
ninja check # Test LLVM only.
ninja clang-test # Test Clang only.
ninja install
And we're live.
All of the tests should pass, though there is a (very) small chance that
you can catch LLVM and Clang out of sync. Running ``'git svn rebase'``
in both the llvm and clang directories should fix any problems.
Finally, we want to set Clang as its own compiler.
.. code-block:: console
cd ~/clang-llvm/build
ccmake ../llvm
The second command will bring up a GUI for configuring Clang. You need
to set the entry for ``CMAKE_CXX_COMPILER``. Press ``'t'`` to turn on
advanced mode. Scroll down to ``CMAKE_CXX_COMPILER``, and set it to
``/usr/bin/clang++``, or wherever you installed it. Press ``'c'`` to
configure, then ``'g'`` to generate CMake's files.
Finally, run ninja one last time, and you're done.
Step 1: Create a ClangTool
==========================
Now that we have enough background knowledge, it's time to create the
simplest productive ClangTool in existence: a syntax checker. While this
already exists as ``clang-check``, it's important to understand what's
going on.
First, we'll need to create a new directory for our tool and tell CMake
that it exists. As this is not going to be a core clang tool, it will
live in the ``tools/extra`` repository.
.. code-block:: console
cd ~/clang-llvm/llvm/tools/clang
mkdir tools/extra/loop-convert
echo 'add_subdirectory(loop-convert)' >> tools/extra/CMakeLists.txt
vim tools/extra/loop-convert/CMakeLists.txt
CMakeLists.txt should have the following contents:
::
set(LLVM_LINK_COMPONENTS support)
set(LLVM_USED_LIBS clangTooling clangBasic clangAST)
add_clang_executable(loop-convert
LoopConvert.cpp
)
target_link_libraries(loop-convert
clangTooling
clangBasic
clangASTMatchers
)
With that done, Ninja will be able to compile our tool. Let's give it
something to compile! Put the following into
``tools/extra/loop-convert/LoopConvert.cpp``. A detailed explanation of
why the different parts are needed can be found in the `LibTooling
documentation <LibTooling.html>`_.
.. code-block:: c++
// Declares clang::SyntaxOnlyAction.
#include "clang/Frontend/FrontendActions.h"
#include "clang/Tooling/CommonOptionsParser.h"
#include "clang/Tooling/Tooling.h"
// Declares llvm::cl::extrahelp.
#include "llvm/Support/CommandLine.h"
using namespace clang::tooling;
using namespace llvm;
// CommonOptionsParser declares HelpMessage with a description of the common
// command-line options related to the compilation database and input files.
// It's nice to have this help message in all tools.
static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage);
// A help message for this specific tool can be added afterwards.
static cl::extrahelp MoreHelp("\nMore help text...");
int main(int argc, const char **argv) {
CommonOptionsParser OptionsParser(argc, argv);
ClangTool Tool(OptionsParser.getCompilations(),
OptionsParser.getSourcePathList());
return Tool.run(newFrontendActionFactory<clang::SyntaxOnlyAction>());
}
And that's it! You can compile our new tool by running ninja from the
``build`` directory.
.. code-block:: console
cd ~/clang-llvm/build
ninja
You should now be able to run the syntax checker, which is located in
``~/clang-llvm/build/bin``, on any source file. Try it!
.. code-block:: console
cat "void main() {}" > test.cpp
bin/loop-convert test.cpp --
Note the two dashes after we specify the source file. The additional
options for the compiler are passed after the dashes rather than loading
them from a compilation database - there just aren't any options needed
right now.
Intermezzo: Learn AST matcher basics
====================================
Clang recently introduced the :doc:`ASTMatcher
library <LibASTMatchers>` to provide a simple, powerful, and
concise way to describe specific patterns in the AST. Implemented as a
DSL powered by macros and templates (see
`ASTMatchers.h <../doxygen/ASTMatchers_8h_source.html>`_ if you're
curious), matchers offer the feel of algebraic data types common to
functional programming languages.
For example, suppose you wanted to examine only binary operators. There
is a matcher to do exactly that, conveniently named ``binaryOperator``.
I'll give you one guess what this matcher does:
.. code-block:: c++
binaryOperator(hasOperatorName("+"), hasLHS(integerLiteral(equals(0))))
Shockingly, it will match against addition expressions whose left hand
side is exactly the literal 0. It will not match against other forms of
0, such as ``'\0'`` or ``NULL``, but it will match against macros that
expand to 0. The matcher will also not match against calls to the
overloaded operator ``'+'``, as there is a separate ``operatorCallExpr``
matcher to handle overloaded operators.
There are AST matchers to match all the different nodes of the AST,
narrowing matchers to only match AST nodes fulfilling specific criteria,
and traversal matchers to get from one kind of AST node to another. For
a complete list of AST matchers, take a look at the `AST Matcher
References <LibASTMatchersReference.html>`_
All matcher that are nouns describe entities in the AST and can be
bound, so that they can be referred to whenever a match is found. To do
so, simply call the method ``bind`` on these matchers, e.g.:
.. code-block:: c++
variable(hasType(isInteger())).bind("intvar")
Step 2: Using AST matchers
==========================
Okay, on to using matchers for real. Let's start by defining a matcher
which will capture all ``for`` statements that define a new variable
initialized to zero. Let's start with matching all ``for`` loops:
.. code-block:: c++
forStmt()
Next, we want to specify that a single variable is declared in the first
portion of the loop, so we can extend the matcher to
.. code-block:: c++
forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl()))))
Finally, we can add the condition that the variable is initialized to
zero.
.. code-block:: c++
forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl(
hasInitializer(integerLiteral(equals(0))))))))
It is fairly easy to read and understand the matcher definition ("match
loops whose init portion declares a single variable which is initialized
to the integer literal 0"), but deciding that every piece is necessary
is more difficult. Note that this matcher will not match loops whose
variables are initialized to ``'\0'``, ``0.0``, ``NULL``, or any form of
zero besides the integer 0.
The last step is giving the matcher a name and binding the ``ForStmt``
as we will want to do something with it:
.. code-block:: c++
StatementMatcher LoopMatcher =
forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl(
hasInitializer(integerLiteral(equals(0)))))))).bind("forLoop");
Once you have defined your matchers, you will need to add a little more
scaffolding in order to run them. Matchers are paired with a
``MatchCallback`` and registered with a ``MatchFinder`` object, then run
from a ``ClangTool``. More code!
Add the following to ``LoopConvert.cpp``:
.. code-block:: c++
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
using namespace clang;
using namespace clang::ast_matchers;
StatementMatcher LoopMatcher =
forStmt(hasLoopInit(declStmt(hasSingleDecl(varDecl(
hasInitializer(integerLiteral(equals(0)))))))).bind("forLoop");
class LoopPrinter : public MatchFinder::MatchCallback {
public :
virtual void run(const MatchFinder::MatchResult &Result) {
if (const ForStmt *FS = Result.Nodes.getNodeAs<clang::ForStmt>("forLoop"))
FS->dump();
};
And change ``main()`` to:
.. code-block:: c++
int main(int argc, const char **argv) {
CommonOptionsParser OptionsParser(argc, argv);
ClangTool Tool(OptionsParser.getCompilations(),
OptionsParser.getSourcePathList());
LoopPrinter Printer;
MatchFinder Finder;
Finder.addMatcher(LoopMatcher, &Printer);
return Tool.run(newFrontendActionFactory(&Finder));
}
Now, you should be able to recompile and run the code to discover for
loops. Create a new file with a few examples, and test out our new
handiwork:
.. code-block:: console
cd ~/clang-llvm/llvm/llvm_build/
ninja loop-convert
vim ~/test-files/simple-loops.cc
bin/loop-convert ~/test-files/simple-loops.cc
Step 3.5: More Complicated Matchers
===================================
Our simple matcher is capable of discovering for loops, but we would
still need to filter out many more ourselves. We can do a good portion
of the remaining work with some cleverly chosen matchers, but first we
need to decide exactly which properties we want to allow.
How can we characterize for loops over arrays which would be eligible
for translation to range-based syntax? Range based loops over arrays of
size ``N`` that:
- start at index ``0``
- iterate consecutively
- end at index ``N-1``
We already check for (1), so all we need to add is a check to the loop's
condition to ensure that the loop's index variable is compared against
``N`` and another check to ensure that the increment step just
increments this same variable. The matcher for (2) is straightforward:
require a pre- or post-increment of the same variable declared in the
init portion.
Unfortunately, such a matcher is impossible to write. Matchers contain
no logic for comparing two arbitrary AST nodes and determining whether
or not they are equal, so the best we can do is matching more than we
would like to allow, and punting extra comparisons to the callback.
In any case, we can start building this sub-matcher. We can require that
the increment step be a unary increment like this:
.. code-block:: c++
hasIncrement(unaryOperator(hasOperatorName("++")))
Specifying what is incremented introduces another quirk of Clang's AST:
Usages of variables are represented as ``DeclRefExpr``'s ("declaration
reference expressions") because they are expressions which refer to
variable declarations. To find a ``unaryOperator`` that refers to a
specific declaration, we can simply add a second condition to it:
.. code-block:: c++
hasIncrement(unaryOperator(
hasOperatorName("++"),
hasUnaryOperand(declRefExpr())))
Furthermore, we can restrict our matcher to only match if the
incremented variable is an integer:
.. code-block:: c++
hasIncrement(unaryOperator(
hasOperatorName("++"),
hasUnaryOperand(declRefExpr(to(varDecl(hasType(isInteger())))))))
And the last step will be to attach an identifier to this variable, so
that we can retrieve it in the callback:
.. code-block:: c++
hasIncrement(unaryOperator(
hasOperatorName("++"),
hasUnaryOperand(declRefExpr(to(
varDecl(hasType(isInteger())).bind("incrementVariable"))))))
We can add this code to the definition of ``LoopMatcher`` and make sure
that our program, outfitted with the new matcher, only prints out loops
that declare a single variable initialized to zero and have an increment
step consisting of a unary increment of some variable.
Now, we just need to add a matcher to check if the condition part of the
``for`` loop compares a variable against the size of the array. There is
only one problem - we don't know which array we're iterating over
without looking at the body of the loop! We are again restricted to
approximating the result we want with matchers, filling in the details
in the callback. So we start with:
.. code-block:: c++
hasCondition(binaryOperator(hasOperatorName("<"))
It makes sense to ensure that the left-hand side is a reference to a
variable, and that the right-hand side has integer type.
.. code-block:: c++
hasCondition(binaryOperator(
hasOperatorName("<"),
hasLHS(declRefExpr(to(varDecl(hasType(isInteger()))))),
hasRHS(expr(hasType(isInteger())))))
Why? Because it doesn't work. Of the three loops provided in
``test-files/simple.cpp``, zero of them have a matching condition. A
quick look at the AST dump of the first for loop, produced by the
previous iteration of loop-convert, shows us the answer:
::
(ForStmt 0x173b240
(DeclStmt 0x173afc8
0x173af50 "int i =
(IntegerLiteral 0x173afa8 'int' 0)")
<<>>
(BinaryOperator 0x173b060 '_Bool' '<'
(ImplicitCastExpr 0x173b030 'int'
(DeclRefExpr 0x173afe0 'int' lvalue Var 0x173af50 'i' 'int'))
(ImplicitCastExpr 0x173b048 'int'
(DeclRefExpr 0x173b008 'const int' lvalue Var 0x170fa80 'N' 'const int')))
(UnaryOperator 0x173b0b0 'int' lvalue prefix '++'
(DeclRefExpr 0x173b088 'int' lvalue Var 0x173af50 'i' 'int'))
(CompoundStatement …
We already know that the declaration and increments both match, or this
loop wouldn't have been dumped. The culprit lies in the implicit cast
applied to the first operand (i.e. the LHS) of the less-than operator,
an L-value to R-value conversion applied to the expression referencing
``i``. Thankfully, the matcher library offers a solution to this problem
in the form of ``ignoringParenImpCasts``, which instructs the matcher to
ignore implicit casts and parentheses before continuing to match.
Adjusting the condition operator will restore the desired match.
.. code-block:: c++
hasCondition(binaryOperator(
hasOperatorName("<"),
hasLHS(ignoringParenImpCasts(declRefExpr(
to(varDecl(hasType(isInteger())))))),
hasRHS(expr(hasType(isInteger())))))
After adding binds to the expressions we wished to capture and
extracting the identifier strings into variables, we have array-step-2
completed.
Step 4: Retrieving Matched Nodes
================================
So far, the matcher callback isn't very interesting: it just dumps the
loop's AST. At some point, we will need to make changes to the input
source code. Next, we'll work on using the nodes we bound in the
previous step.
The ``MatchFinder::run()`` callback takes a
``MatchFinder::MatchResult&`` as its parameter. We're most interested in
its ``Context`` and ``Nodes`` members. Clang uses the ``ASTContext``
class to represent contextual information about the AST, as the name
implies, though the most functionally important detail is that several
operations require an ``ASTContext*`` parameter. More immediately useful
is the set of matched nodes, and how we retrieve them.
Since we bind three variables (identified by ConditionVarName,
InitVarName, and IncrementVarName), we can obtain the matched nodes by
using the ``getNodeAs()`` member function.
In ``LoopActions.cpp``:
.. code-block:: c++
#include "clang/AST/ASTContext.h"
void LoopPrinter::run(const MatchFinder::MatchResult &Result) {
ASTContext *Context = Result.Context;
const ForStmt *FS = Result.Nodes.getStmtAs<ForStmt>(LoopName);
// We do not want to convert header files!
if (!FS || !Context->getSourceManager().isFromMainFile(FS->getForLoc()))
return;
const VarDecl *IncVar = Result.Nodes.getNodeAs<VarDecl>(IncrementVarName);
const VarDecl *CondVar = Result.Nodes.getNodeAs<VarDecl>(ConditionVarName);
const VarDecl *InitVar = Result.Nodes.getNodeAs<VarDecl>(InitVarName);
Now that we have the three variables, represented by their respective
declarations, let's make sure that they're all the same, using a helper
function I call ``areSameVariable()``.
.. code-block:: c++
if (!areSameVariable(IncVar, CondVar) || !areSameVariable(IncVar, InitVar))
return;
llvm::outs() << "Potential array-based loop discovered.\n";
}
If execution reaches the end of ``LoopPrinter::run()``, we know that the
loop shell that looks like
.. code-block:: c++
for (int i= 0; i < expr(); ++i) { ... }
For now, we will just print a message explaining that we found a loop.
The next section will deal with recursively traversing the AST to
discover all changes needed.
As a side note, here is the implementation of ``areSameVariable``. Clang
associates a ``VarDecl`` with each variable to represent the variable's
declaration. Since the "canonical" form of each declaration is unique by
address, all we need to do is make sure neither ``ValueDecl`` (base
class of ``VarDecl``) is ``NULL`` and compare the canonical Decls.
.. code-block:: c++
static bool areSameVariable(const ValueDecl *First, const ValueDecl *Second) {
return First && Second &&
First->getCanonicalDecl() == Second->getCanonicalDecl();
}
It's not as trivial to test if two expressions are the same, though
Clang has already done the hard work for us by providing a way to
canonicalize expressions:
.. code-block:: c++
static bool areSameExpr(ASTContext *Context, const Expr *First,
const Expr *Second) {
if (!First || !Second)
return false;
llvm::FoldingSetNodeID FirstID, SecondID;
First->Profile(FirstID, *Context, true);
Second->Profile(SecondID, *Context, true);
return FirstID == SecondID;
}
This code relies on the comparison between two
``llvm::FoldingSetNodeIDs``. As the documentation for
``Stmt::Profile()`` indicates, the ``Profile()`` member function builds
a description of a node in the AST, based on its properties, along with
those of its children. ``FoldingSetNodeID`` then serves as a hash we can
use to compare expressions. We will need ``areSameExpr`` later. Before
you run the new code on the additional loops added to
test-files/simple.cpp, try to figure out which ones will be considered
potentially convertible.

56
docs/LibFormat.rst Normal file
View File

@ -0,0 +1,56 @@
=========
LibFormat
=========
LibFormat is a library that implements automatic source code formatting based
on Clang. This documents describes the LibFormat interface and design as well
as some basic style discussions.
If you just want to use `clang-format` as a tool or integrated into an editor,
checkout :doc:`ClangFormat`.
Design
------
FIXME: Write up design.
Interface
---------
The core routine of LibFormat is ``reformat()``:
.. code-block:: c++
tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
SourceManager &SourceMgr,
std::vector<CharSourceRange> Ranges);
This reads a token stream out of the lexer ``Lex`` and reformats all the code
ranges in ``Ranges``. The ``FormatStyle`` controls basic decisions made during
formatting. A list of options can be found under :ref:`style-options`.
.. _style-options:
Style Options
-------------
The style options describe specific formatting options that can be used in
order to make `ClangFormat` comply with different style guides. Currently,
two style guides are hard-coded:
.. code-block:: c++
/// \brief Returns a format style complying with the LLVM coding standards:
/// http://llvm.org/docs/CodingStandards.html.
FormatStyle getLLVMStyle();
/// \brief Returns a format style complying with Google's C++ style guide:
/// http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml.
FormatStyle getGoogleStyle();
These options are also exposed in the :doc:`standalone tools <ClangFormat>`
through the `-style` option.
In the future, we plan on making this configurable.

View File

@ -1,212 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>LibTooling</title>
<link type="text/css" rel="stylesheet" href="../menu.css">
<link type="text/css" rel="stylesheet" href="../content.css">
</head>
<body>
<!--#include virtual="../menu.html.incl"-->
<div id="content">
<h1>LibTooling</h1>
<p>LibTooling is a library to support writing standalone tools based on
Clang. This document will provide a basic walkthrough of how to write
a tool using LibTooling.</p>
<p>For the information on how to setup Clang Tooling for LLVM see
<a href="HowToSetupToolingForLLVM.html">HowToSetupToolingForLLVM.html</a></p>
<!-- ======================================================================= -->
<h2 id="intro">Introduction</h2>
<!-- ======================================================================= -->
<p>Tools built with LibTooling, like Clang Plugins, run
<code>FrontendActions</code> over code.
<!-- See FIXME for a tutorial on how to write FrontendActions. -->
In this tutorial, we'll demonstrate the different ways of running clang's
<code>SyntaxOnlyAction</code>, which runs a quick syntax check, over a bunch of
code.</p>
<!-- ======================================================================= -->
<h2 id="runoncode">Parsing a code snippet in memory.</h2>
<!-- ======================================================================= -->
<p>If you ever wanted to run a <code>FrontendAction</code> over some sample
code, for example to unit test parts of the Clang AST,
<code>runToolOnCode</code> is what you looked for. Let me give you an example:
<pre>
#include "clang/Tooling/Tooling.h"
TEST(runToolOnCode, CanSyntaxCheckCode) {
// runToolOnCode returns whether the action was correctly run over the
// given code.
EXPECT_TRUE(runToolOnCode(new clang::SyntaxOnlyAction, "class X {};"));
}
</pre>
<!-- ======================================================================= -->
<h2 id="standalonetool">Writing a standalone tool.</h2>
<!-- ======================================================================= -->
<p>Once you unit tested your <code>FrontendAction</code> to the point where it
cannot possibly break, it's time to create a standalone tool. For a standalone
tool to run clang, it first needs to figure out what command line arguments to
use for a specified file. To that end we create a
<code>CompilationDatabase</code>. There are different ways to create a
compilation database, and we need to support all of them depending on
command-line options. There's the <code>CommonOptionsParser</code> class
that takes the responsibility to parse command-line parameters related to
compilation databases and inputs, so that all tools share the implementation.
</p>
<h3 id="parsingcommonoptions">Parsing common tools options.</h3>
<p><code>CompilationDatabase</code> can be read from a build directory or the
command line. Using <code>CommonOptionsParser</code> allows for explicit
specification of a compile command line, specification of build path using the
<code>-p</code> command-line option, and automatic location of the compilation
database using source files paths.
<pre>
#include "clang/Tooling/CommonOptionsParser.h"
using namespace clang::tooling;
int main(int argc, const char **argv) {
// CommonOptionsParser constructor will parse arguments and create a
// CompilationDatabase. In case of error it will terminate the program.
CommonOptionsParser OptionsParser(argc, argv);
// Use OptionsParser.GetCompilations() and OptionsParser.GetSourcePathList()
// to retrieve CompilationDatabase and the list of input file paths.
}
</pre>
</p>
<h3 id="tool">Creating and running a ClangTool.</h3>
<p>Once we have a <code>CompilationDatabase</code>, we can create a
<code>ClangTool</code> and run our <code>FrontendAction</code> over some code.
For example, to run the <code>SyntaxOnlyAction</code> over the files "a.cc" and
"b.cc" one would write:
<pre>
// A clang tool can run over a number of sources in the same process...
std::vector&lt;std::string> Sources;
Sources.push_back("a.cc");
Sources.push_back("b.cc");
// We hand the CompilationDatabase we created and the sources to run over into
// the tool constructor.
ClangTool Tool(OptionsParser.GetCompilations(), Sources);
// The ClangTool needs a new FrontendAction for each translation unit we run
// on. Thus, it takes a FrontendActionFactory as parameter. To create a
// FrontendActionFactory from a given FrontendAction type, we call
// newFrontendActionFactory&lt;clang::SyntaxOnlyAction>().
int result = Tool.run(newFrontendActionFactory&lt;clang::SyntaxOnlyAction>());
</pre>
</p>
<h3 id="main">Putting it together - the first tool.</h3>
<p>Now we combine the two previous steps into our first real tool. This example
tool is also checked into the clang tree at tools/clang-check/ClangCheck.cpp.
<pre>
// Declares clang::SyntaxOnlyAction.
#include "clang/Frontend/FrontendActions.h"
#include "clang/Tooling/CommonOptionsParser.h"
#include "clang/Tooling/Tooling.h"
// Declares llvm::cl::extrahelp.
#include "llvm/Support/CommandLine.h"
using namespace clang::tooling;
using namespace llvm;
// CommonOptionsParser declares HelpMessage with a description of the common
// command-line options related to the compilation database and input files.
// It's nice to have this help message in all tools.
static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage);
// A help message for this specific tool can be added afterwards.
static cl::extrahelp MoreHelp("\nMore help text...");
int main(int argc, const char **argv) {
CommonOptionsParser OptionsParser(argc, argv);
ClangTool Tool(OptionsParser.GetCompilations(),
OptionsParser.GetSourcePathList());
return Tool.run(newFrontendActionFactory&lt;clang::SyntaxOnlyAction&gt;());
}
</pre>
</p>
<h3 id="running">Running the tool on some code.</h3>
<p>When you check out and build clang, clang-check is already built and
available to you in bin/clang-check inside your build directory.</p>
<p>You can run clang-check on a file in the llvm repository by specifying
all the needed parameters after a "--" separator:
<pre>
$ cd /path/to/source/llvm
$ export BD=/path/to/build/llvm
$ $BD/bin/clang-check tools/clang/tools/clang-check/ClangCheck.cpp -- \
clang++ -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS \
-Itools/clang/include -I$BD/include -Iinclude -Itools/clang/lib/Headers -c
</pre>
</p>
<p>As an alternative, you can also configure cmake to output a compile command
database into its build directory:
<pre>
# Alternatively to calling cmake, use ccmake, toggle to advanced mode and
# set the parameter CMAKE_EXPORT_COMPILE_COMMANDS from the UI.
$ cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON .
</pre>
</p>
<p>
This creates a file called compile_commands.json in the build directory. Now
you can run clang-check over files in the project by specifying the build path
as first argument and some source files as further positional arguments:
<pre>
$ cd /path/to/source/llvm
$ export BD=/path/to/build/llvm
$ $BD/bin/clang-check -p $BD tools/clang/tools/clang-check/ClangCheck.cpp
</pre>
</p>
<h3 id="builtin">Builtin includes.</h3>
<p>Clang tools need their builtin headers and search for them the same way clang
does. Thus, the default location to look for builtin headers is in a path
$(dirname /path/to/tool)/../lib/clang/3.2/include relative to the tool
binary. This works out-of-the-box for tools running from llvm's toplevel
binary directory after building clang-headers, or if the tool is running
from the binary directory of a clang install next to the clang binary.</p>
<p>Tips: if your tool fails to find stddef.h or similar headers, call
the tool with -v and look at the search paths it looks through.</p>
<h3 id="linking">Linking.</h3>
<p>Please note that this presents the linking requirements at the time of this
writing. For the most up-to-date information, look at one of the tools'
Makefiles (for example
<a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/clang-check/Makefile?view=markup">clang-check/Makefile</a>).
</p>
<p>To link a binary using the tooling infrastructure, link in the following
libraries:
<ul>
<li>Tooling</li>
<li>Frontend</li>
<li>Driver</li>
<li>Serialization</li>
<li>Parse</li>
<li>Sema</li>
<li>Analysis</li>
<li>Edit</li>
<li>AST</li>
<li>Lex</li>
<li>Basic</li>
</ul>
</p>
</div>
</body>
</html>

192
docs/LibTooling.rst Normal file
View File

@ -0,0 +1,192 @@
==========
LibTooling
==========
LibTooling is a library to support writing standalone tools based on Clang.
This document will provide a basic walkthrough of how to write a tool using
LibTooling.
For the information on how to setup Clang Tooling for LLVM see
:doc:`HowToSetupToolingForLLVM`
Introduction
------------
Tools built with LibTooling, like Clang Plugins, run ``FrontendActions`` over
code.
.. See FIXME for a tutorial on how to write FrontendActions.
In this tutorial, we'll demonstrate the different ways of running Clang's
``SyntaxOnlyAction``, which runs a quick syntax check, over a bunch of code.
Parsing a code snippet in memory
--------------------------------
If you ever wanted to run a ``FrontendAction`` over some sample code, for
example to unit test parts of the Clang AST, ``runToolOnCode`` is what you
looked for. Let me give you an example:
.. code-block:: c++
#include "clang/Tooling/Tooling.h"
TEST(runToolOnCode, CanSyntaxCheckCode) {
// runToolOnCode returns whether the action was correctly run over the
// given code.
EXPECT_TRUE(runToolOnCode(new clang::SyntaxOnlyAction, "class X {};"));
}
Writing a standalone tool
-------------------------
Once you unit tested your ``FrontendAction`` to the point where it cannot
possibly break, it's time to create a standalone tool. For a standalone tool
to run clang, it first needs to figure out what command line arguments to use
for a specified file. To that end we create a ``CompilationDatabase``. There
are different ways to create a compilation database, and we need to support all
of them depending on command-line options. There's the ``CommonOptionsParser``
class that takes the responsibility to parse command-line parameters related to
compilation databases and inputs, so that all tools share the implementation.
Parsing common tools options
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
``CompilationDatabase`` can be read from a build directory or the command line.
Using ``CommonOptionsParser`` allows for explicit specification of a compile
command line, specification of build path using the ``-p`` command-line option,
and automatic location of the compilation database using source files paths.
.. code-block:: c++
#include "clang/Tooling/CommonOptionsParser.h"
using namespace clang::tooling;
int main(int argc, const char **argv) {
// CommonOptionsParser constructor will parse arguments and create a
// CompilationDatabase. In case of error it will terminate the program.
CommonOptionsParser OptionsParser(argc, argv);
// Use OptionsParser.getCompilations() and OptionsParser.getSourcePathList()
// to retrieve CompilationDatabase and the list of input file paths.
}
Creating and running a ClangTool
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Once we have a ``CompilationDatabase``, we can create a ``ClangTool`` and run
our ``FrontendAction`` over some code. For example, to run the
``SyntaxOnlyAction`` over the files "a.cc" and "b.cc" one would write:
.. code-block:: c++
// A clang tool can run over a number of sources in the same process...
std::vector<std::string> Sources;
Sources.push_back("a.cc");
Sources.push_back("b.cc");
// We hand the CompilationDatabase we created and the sources to run over into
// the tool constructor.
ClangTool Tool(OptionsParser.getCompilations(), Sources);
// The ClangTool needs a new FrontendAction for each translation unit we run
// on. Thus, it takes a FrontendActionFactory as parameter. To create a
// FrontendActionFactory from a given FrontendAction type, we call
// newFrontendActionFactory<clang::SyntaxOnlyAction>().
int result = Tool.run(newFrontendActionFactory<clang::SyntaxOnlyAction>());
Putting it together --- the first tool
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Now we combine the two previous steps into our first real tool. This example
tool is also checked into the clang tree at
``tools/clang-check/ClangCheck.cpp``.
.. code-block:: c++
// Declares clang::SyntaxOnlyAction.
#include "clang/Frontend/FrontendActions.h"
#include "clang/Tooling/CommonOptionsParser.h"
#include "clang/Tooling/Tooling.h"
// Declares llvm::cl::extrahelp.
#include "llvm/Support/CommandLine.h"
using namespace clang::tooling;
using namespace llvm;
// CommonOptionsParser declares HelpMessage with a description of the common
// command-line options related to the compilation database and input files.
// It's nice to have this help message in all tools.
static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage);
// A help message for this specific tool can be added afterwards.
static cl::extrahelp MoreHelp("\nMore help text...");
int main(int argc, const char **argv) {
CommonOptionsParser OptionsParser(argc, argv);
ClangTool Tool(OptionsParser.getCompilations(),
OptionsParser.getSourcePathList());
return Tool.run(newFrontendActionFactory<clang::SyntaxOnlyAction>());
}
Running the tool on some code
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
When you check out and build clang, clang-check is already built and available
to you in bin/clang-check inside your build directory.
You can run clang-check on a file in the llvm repository by specifying all the
needed parameters after a "``--``" separator:
.. code-block:: bash
$ cd /path/to/source/llvm
$ export BD=/path/to/build/llvm
$ $BD/bin/clang-check tools/clang/tools/clang-check/ClangCheck.cpp -- \
clang++ -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS \
-Itools/clang/include -I$BD/include -Iinclude \
-Itools/clang/lib/Headers -c
As an alternative, you can also configure cmake to output a compile command
database into its build directory:
.. code-block:: bash
# Alternatively to calling cmake, use ccmake, toggle to advanced mode and
# set the parameter CMAKE_EXPORT_COMPILE_COMMANDS from the UI.
$ cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON .
This creates a file called ``compile_commands.json`` in the build directory.
Now you can run :program:`clang-check` over files in the project by specifying
the build path as first argument and some source files as further positional
arguments:
.. code-block:: bash
$ cd /path/to/source/llvm
$ export BD=/path/to/build/llvm
$ $BD/bin/clang-check -p $BD tools/clang/tools/clang-check/ClangCheck.cpp
.. _libtooling_builtin_includes:
Builtin includes
^^^^^^^^^^^^^^^^
Clang tools need their builtin headers and search for them the same way Clang
does. Thus, the default location to look for builtin headers is in a path
``$(dirname /path/to/tool)/../lib/clang/3.3/include`` relative to the tool
binary. This works out-of-the-box for tools running from llvm's toplevel
binary directory after building clang-headers, or if the tool is running from
the binary directory of a clang install next to the clang binary.
Tips: if your tool fails to find ``stddef.h`` or similar headers, call the tool
with ``-v`` and look at the search paths it looks through.
Linking
^^^^^^^
For a list of libraries to link, look at one of the tools' Makefiles (for
example `clang-check/Makefile
<http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/clang-check/Makefile?view=markup>`_).

163
docs/Makefile.sphinx Normal file
View File

@ -0,0 +1,163 @@
# Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = _build
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext default
default: html
help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
clean:
-rm -rf $(BUILDDIR)/*
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@# FIXME: Remove this `cp` once HTML->Sphinx transition is completed.
@# Kind of a hack, but HTML-formatted docs are on the way out anyway.
@echo "Copying legacy HTML-formatted docs into $(BUILDDIR)/html"
@cp -a *.html $(BUILDDIR)/html
@# FIXME: What we really need is a way to specify redirects, so that
@# we can just redirect to a reST'ified version of this document.
@# PR14714 is tracking the issue of redirects.
@cp -a Block-ABI-Apple.txt $(BUILDDIR)/html
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Clang.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Clang.qhc"
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/Clang"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Clang"
@echo "# devhelp"
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."

178
docs/MemorySanitizer.rst Normal file
View File

@ -0,0 +1,178 @@
================
MemorySanitizer
================
.. contents::
:local:
Introduction
============
MemorySanitizer is a detector of uninitialized reads. It consists of a
compiler instrumentation module and a run-time library.
Typical slowdown introduced by MemorySanitizer is **3x**.
How to build
============
Follow the `clang build instructions <../get_started.html>`_. CMake
build is supported.
Usage
=====
Simply compile and link your program with ``-fsanitize=memory`` flag.
The MemorySanitizer run-time library should be linked to the final
executable, so make sure to use ``clang`` (not ``ld``) for the final
link step. When linking shared libraries, the MemorySanitizer run-time
is not linked, so ``-Wl,-z,defs`` may cause link errors (don't use it
with MemorySanitizer). To get a reasonable performance add ``-O1`` or
higher. To get meaninful stack traces in error messages add
``-fno-omit-frame-pointer``. To get perfect stack traces you may need
to disable inlining (just use ``-O1``) and tail call elimination
(``-fno-optimize-sibling-calls``).
.. code-block:: console
% cat umr.cc
#include <stdio.h>
int main(int argc, char** argv) {
int* a = new int[10];
a[5] = 0;
if (a[argc])
printf("xx\n");
return 0;
}
% clang -fsanitize=memory -fPIE -pie -fno-omit-frame-pointer -g -O2 umr.cc
If a bug is detected, the program will print an error message to
stderr and exit with a non-zero exit code. Currently, MemorySanitizer
does not symbolize its output by default, so you may need to use a
separate script to symbolize the result offline (this will be fixed in
future).
.. code-block:: console
% ./a.out 2>log
% projects/compiler-rt/lib/asan/scripts/asan_symbolize.py / < log | c++filt
==30106== WARNING: MemorySanitizer: UMR (uninitialized-memory-read)
#0 0x7f45944b418a in main umr.cc:6
#1 0x7f45938b676c in __libc_start_main libc-start.c:226
Exiting
By default, MemorySanitizer exits on the first detected error.
``__has_feature(memory_sanitizer)``
------------------------------------
In some cases one may need to execute different code depending on
whether MemorySanitizer is enabled. :ref:`\_\_has\_feature
<langext-__has_feature-__has_extension>` can be used for this purpose.
.. code-block:: c
#if defined(__has_feature)
# if __has_feature(memory_sanitizer)
// code that builds only under MemorySanitizer
# endif
#endif
``__attribute__((no_sanitize_memory))``
-----------------------------------------------
Some code should not be checked by MemorySanitizer.
One may use the function attribute
:ref:`no_sanitize_memory <langext-memory_sanitizer>`
to disable uninitialized checks in a particular function.
MemorySanitizer may still instrument such functions to avoid false positives.
This attribute may not be
supported by other compilers, so we suggest to use it together with
``__has_feature(memory_sanitizer)``. Note: currently, this attribute will be
lost if the function is inlined.
Origin Tracking
===============
MemorySanitizer can track origins of unitialized values, similar to
Valgrind's --track-origins option. This feature is enabled by
``-fsanitize-memory-track-origins`` Clang option. With the code from
the example above,
.. code-block:: console
% clang -fsanitize=memory -fsanitize-memory-track-origins -fPIE -pie -fno-omit-frame-pointer -g -O2 umr.cc
% ./a.out 2>log
% projects/compiler-rt/lib/asan/scripts/asan_symbolize.py / < log | c++filt
==14425== WARNING: MemorySanitizer: UMR (uninitialized-memory-read)
==14425== WARNING: Trying to symbolize code, but external symbolizer is not initialized!
#0 0x7f8bdda3824b in main umr.cc:6
#1 0x7f8bdce3a76c in __libc_start_main libc-start.c:226
raw origin id: 2030043137
ORIGIN: heap allocation:
#0 0x7f8bdda4034b in operator new[](unsigned long) msan_new_delete.cc:39
#1 0x7f8bdda3814d in main umr.cc:4
#2 0x7f8bdce3a76c in __libc_start_main libc-start.c:226
Exiting
Origin tracking has proved to be very useful for debugging UMR
reports. It slows down program execution by a factor of 1.5x-2x on top
of the usual MemorySanitizer slowdown.
Handling external code
============================
MemorySanitizer requires that all program code is instrumented. This
also includes any libraries that the program depends on, even libc.
Failing to achieve this may result in false UMR reports.
Full MemorySanitizer instrumentation is very difficult to achieve. To
make it easier, MemorySanitizer runtime library includes 70+
interceptors for the most common libc functions. They make it possible
to run MemorySanitizer-instrumented programs linked with
uninstrumented libc. For example, the authors were able to bootstrap
MemorySanitizer-instrumented Clang compiler by linking it with
self-built instrumented libcxx (as a replacement for libstdc++).
In the case when rebuilding all program dependencies with
MemorySanitizer is problematic, an experimental MSanDR tool can be
used. It is a DynamoRio-based tool that uses dynamic instrumentation
to avoid false positives due to uninstrumented code. The tool simply
marks memory from instrumented libraries as fully initialized. See
`http://code.google.com/p/memory-sanitizer/wiki/Running#Running_with_the_dynamic_tool`
for more information.
Supported Platforms
===================
MemorySanitizer is supported on
* Linux x86\_64 (tested on Ubuntu 10.04 and 12.04);
Limitations
===========
* MemorySanitizer uses 2x more real memory than a native run, 3x with
origin tracking.
* MemorySanitizer maps (but not reserves) 64 Terabytes of virtual
address space. This means that tools like ``ulimit`` may not work as
usually expected.
* Static linking is not supported.
* Non-position-independent executables are not supported.
* Depending on the version of Linux kernel, running without ASLR may
be not supported. Note that GDB disables ASLR by default. To debug
instrumented programs, use "set disable-randomization off".
Current Status
==============
MemorySanitizer is an experimental tool. It is known to work on large
real-world programs, like Clang/LLVM itself.
More Information
================
`http://code.google.com/p/memory-sanitizer <http://code.google.com/p/memory-sanitizer/>`_

713
docs/Modules.rst Normal file
View File

@ -0,0 +1,713 @@
=======
Modules
=======
.. contents::
:local:
.. warning::
The functionality described on this page is still experimental! Please
try it out and send us bug reports!
Introduction
============
Most software is built using a number of software libraries, including libraries supplied by the platform, internal libraries built as part of the software itself to provide structure, and third-party libraries. For each library, one needs to access both its interface (API) and its implementation. In the C family of languages, the interface to a library is accessed by including the appropriate header files(s):
.. code-block:: c
#include <SomeLib.h>
The implementation is handled separately by linking against the appropriate library. For example, by passing ``-lSomeLib`` to the linker.
Modules provide an alternative, simpler way to use software libraries that provides better compile-time scalability and eliminates many of the problems inherent to using the C preprocessor to access the API of a library.
Problems with the current model
-------------------------------
The ``#include`` mechanism provided by the C preprocessor is a very poor way to access the API of a library, for a number of reasons:
* **Compile-time scalability**: Each time a header is included, the
compiler must preprocess and parse the text in that header and every
header it includes, transitively. This process must be repeated for
every translation unit in the application, which involves a huge
amount of redundant work. In a project with *N* translation units
and *M* headers included in each translation unit, the compiler is
performing *M x N* work even though most of the *M* headers are
shared among multiple translation units. C++ is particularly bad,
because the compilation model for templates forces a huge amount of
code into headers.
* **Fragility**: ``#include`` directives are treated as textual
inclusion by the preprocessor, and are therefore subject to any
active macro definitions at the time of inclusion. If any of the
active macro definitions happens to collide with a name in the
library, it can break the library API or cause compilation failures
in the library header itself. For an extreme example,
``#define std "The C++ Standard"`` and then include a standard
library header: the result is a horrific cascade of failures in the
C++ Standard Library's implementation. More subtle real-world
problems occur when the headers for two different libraries interact
due to macro collisions, and users are forced to reorder
``#include`` directives or introduce ``#undef`` directives to break
the (unintended) dependency.
* **Conventional workarounds**: C programmers have
adopted a number of conventions to work around the fragility of the
C preprocessor model. Include guards, for example, are required for
the vast majority of headers to ensure that multiple inclusion
doesn't break the compile. Macro names are written with
``LONG_PREFIXED_UPPERCASE_IDENTIFIERS`` to avoid collisions, and some
library/framework developers even use ``__underscored`` names
in headers to avoid collisions with "normal" names that (by
convention) shouldn't even be macros. These conventions are a
barrier to entry for developers coming from non-C languages, are
boilerplate for more experienced developers, and make our headers
far uglier than they should be.
* **Tool confusion**: In a C-based language, it is hard to build tools
that work well with software libraries, because the boundaries of
the libraries are not clear. Which headers belong to a particular
library, and in what order should those headers be included to
guarantee that they compile correctly? Are the headers C, C++,
Objective-C++, or one of the variants of these languages? What
declarations in those headers are actually meant to be part of the
API, and what declarations are present only because they had to be
written as part of the header file?
Semantic import
---------------
Modules improve access to the API of software libraries by replacing the textual preprocessor inclusion model with a more robust, more efficient semantic model. From the user's perspective, the code looks only slightly different, because one uses an ``import`` declaration rather than a ``#include`` preprocessor directive:
.. code-block:: c
import std.io; // pseudo-code; see below for syntax discussion
However, this module import behaves quite differently from the corresponding ``#include <stdio.h>``: when the compiler sees the module import above, it loads a binary representation of the ``std.io`` module and makes its API available to the application directly. Preprocessor definitions that precede the import declaration have no impact on the API provided by ``std.io``, because the module itself was compiled as a separate, standalone module. Additionally, any linker flags required to use the ``std.io`` module will automatically be provided when the module is imported [#]_
This semantic import model addresses many of the problems of the preprocessor inclusion model:
* **Compile-time scalability**: The ``std.io`` module is only compiled once, and importing the module into a translation unit is a constant-time operation (independent of module system). Thus, the API of each software library is only parsed once, reducing the *M x N* compilation problem to an *M + N* problem.
* **Fragility**: Each module is parsed as a standalone entity, so it has a consistent preprocessor environment. This completely eliminates the need for ``__underscored`` names and similarly defensive tricks. Moreover, the current preprocessor definitions when an import declaration is encountered are ignored, so one software library can not affect how another software library is compiled, eliminating include-order dependencies.
* **Tool confusion**: Modules describe the API of software libraries, and tools can reason about and present a module as a representation of that API. Because modules can only be built standalone, tools can rely on the module definition to ensure that they get the complete API for the library. Moreover, modules can specify which languages they work with, so, e.g., one can not accidentally attempt to load a C++ module into a C program.
Problems modules do not solve
-----------------------------
Many programming languages have a module or package system, and because of the variety of features provided by these languages it is important to define what modules do *not* do. In particular, all of the following are considered out-of-scope for modules:
* **Rewrite the world's code**: It is not realistic to require applications or software libraries to make drastic or non-backward-compatible changes, nor is it feasible to completely eliminate headers. Modules must interoperate with existing software libraries and allow a gradual transition.
* **Versioning**: Modules have no notion of version information. Programmers must still rely on the existing versioning mechanisms of the underlying language (if any exist) to version software libraries.
* **Namespaces**: Unlike in some languages, modules do not imply any notion of namespaces. Thus, a struct declared in one module will still conflict with a struct of the same name declared in a different module, just as they would if declared in two different headers. This aspect is important for backward compatibility, because (for example) the mangled names of entities in software libraries must not change when introducing modules.
* **Binary distribution of modules**: Headers (particularly C++ headers) expose the full complexity of the language. Maintaining a stable binary module format across architectures, compiler versions, and compiler vendors is technically infeasible.
Using Modules
=============
To enable modules, pass the command-line flag ``-fmodules`` [#]_. This will make any modules-enabled software libraries available as modules as well as introducing any modules-specific syntax. Additional `command-line parameters`_ are described in a separate section later.
Import declaration
------------------
The most direct way to import a module is with an *import declaration*, which imports the named module:
.. parsed-literal::
import std;
The import declaration above imports the entire contents of the ``std`` module (which would contain, e.g., the entire C or C++ standard library) and make its API available within the current translation unit. To import only part of a module, one may use dot syntax to specific a particular submodule, e.g.,
.. parsed-literal::
import std.io;
Redundant import declarations are ignored, and one is free to import modules at any point within the translation unit, so long as the import declaration is at global scope.
.. warning::
The import declaration syntax described here does not actually exist. Rather, it is a straw man proposal that may very well change when modules are discussed in the C and C++ committees. See the section `Includes as imports`_ to see how modules get imported today.
Includes as imports
-------------------
The primary user-level feature of modules is the import operation, which provides access to the API of software libraries. However, today's programs make extensive use of ``#include``, and it is unrealistic to assume that all of this code will change overnight. Instead, modules automatically translate ``#include`` directives into the corresponding module import. For example, the include directive
.. code-block:: c
#include <stdio.h>
will be automatically mapped to an import of the module ``std.io``. Even with specific ``import`` syntax in the language, this particular feature is important for both adoption and backward compatibility: automatic translation of ``#include`` to ``import`` allows an application to get the benefits of modules (for all modules-enabled libraries) without any changes to the application itself. Thus, users can easily use modules with one compiler while falling back to the preprocessor-inclusion mechanism with other compilers.
.. note::
The automatic mapping of ``#include`` to ``import`` also solves an implementation problem: importing a module with a definition of some entity (say, a ``struct Point``) and then parsing a header containing another definition of ``struct Point`` would cause a redefinition error, even if it is the same ``struct Point``. By mapping ``#include`` to ``import``, the compiler can guarantee that it always sees just the already-parsed definition from the module.
Module maps
-----------
The crucial link between modules and headers is described by a *module map*, which describes how a collection of existing headers maps on to the (logical) structure of a module. For example, one could imagine a module ``std`` covering the C standard library. Each of the C standard library headers (``<stdio.h>``, ``<stdlib.h>``, ``<math.h>``, etc.) would contribute to the ``std`` module, by placing their respective APIs into the corresponding submodule (``std.io``, ``std.lib``, ``std.math``, etc.). Having a list of the headers that are part of the ``std`` module allows the compiler to build the ``std`` module as a standalone entity, and having the mapping from header names to (sub)modules allows the automatic translation of ``#include`` directives to module imports.
Module maps are specified as separate files (each named ``module.map``) alongside the headers they describe, which allows them to be added to existing software libraries without having to change the library headers themselves (in most cases [#]_). The actual `Module map language`_ is described in a later section.
.. note::
To actually see any benefits from modules, one first has to introduce module maps for the underlying C standard library and the libraries and headers on which it depends. The section `Modularizing a Platform`_ describes the steps one must take to write these module maps.
Compilation model
-----------------
The binary representation of modules is automatically generated by the compiler on an as-needed basis. When a module is imported (e.g., by an ``#include`` of one of the module's headers), the compiler will spawn a second instance of itself [#]_, with a fresh preprocessing context [#]_, to parse just the headers in that module. The resulting Abstract Syntax Tree (AST) is then persisted into the binary representation of the module that is then loaded into translation unit where the module import was encountered.
The binary representation of modules is persisted in the *module cache*. Imports of a module will first query the module cache and, if a binary representation of the required module is already available, will load that representation directly. Thus, a module's headers will only be parsed once per language configuration, rather than once per translation unit that uses the module.
Modules maintain references to each of the headers that were part of the module build. If any of those headers changes, or if any of the modules on which a module depends change, then the module will be (automatically) recompiled. The process should never require any user intervention.
Command-line parameters
-----------------------
``-fmodules``
Enable the modules feature (EXPERIMENTAL).
``-fcxx-modules``
Enable the modules feature for C++ (EXPERIMENTAL and VERY BROKEN).
``-fmodules-cache-path=<directory>``
Specify the path to the modules cache. If not provided, Clang will select a system-appropriate default.
``-f[no-]modules-autolink``
Enable of disable automatic linking against the libraries associated with imported modules.
``-fmodules-ignore-macro=macroname``
Instruct modules to ignore the named macro when selecting an appropriate module variant. Use this for macros defined on the command line that don't affect how modules are built, to improve sharing of compiled module files.
``-fmodules-prune-interval=seconds``
Specify the minimum delay (in seconds) between attempts to prune the module cache. Module cache pruning attempts to clear out old, unused module files so that the module cache itself does not grow without bound. The default delay is large (604,800 seconds, or 7 days) because this is an expensive operation. Set this value to 0 to turn off pruning.
``-fmodules-prune-after=seconds``
Specify the minimum time (in seconds) for which a file in the module cache must be unused (according to access time) before module pruning will remove it. The default delay is large (2,678,400 seconds, or 31 days) to avoid excessive module rebuilding.
``-module-file-info <module file name>``
Debugging aid that prints information about a given module file (with a ``.pcm`` extension), including the language and preprocessor options that particular module variant was built with.
Module Map Language
===================
The module map language describes the mapping from header files to the
logical structure of modules. To enable support for using a library as
a module, one must write a ``module.map`` file for that library. The
``module.map`` file is placed alongside the header files themselves,
and is written in the module map language described below.
As an example, the module map file for the C standard library might look a bit like this:
.. parsed-literal::
module std [system] {
module complex {
header "complex.h"
export *
}
module ctype {
header "ctype.h"
export *
}
module errno {
header "errno.h"
header "sys/errno.h"
export *
}
module fenv {
header "fenv.h"
export *
}
// ...more headers follow...
}
Here, the top-level module ``std`` encompasses the whole C standard library. It has a number of submodules containing different parts of the standard library: ``complex`` for complex numbers, ``ctype`` for character types, etc. Each submodule lists one of more headers that provide the contents for that submodule. Finally, the ``export *`` command specifies that anything included by that submodule will be automatically re-exported.
Lexical structure
-----------------
Module map files use a simplified form of the C99 lexer, with the same rules for identifiers, tokens, string literals, ``/* */`` and ``//`` comments. The module map language has the following reserved words; all other C identifiers are valid identifiers.
.. parsed-literal::
``config_macros`` ``export`` ``module``
``conflict`` ``framework`` ``requires``
``exclude`` ``header`` ``umbrella``
``explicit`` ``link``
Module map file
---------------
A module map file consists of a series of module declarations:
.. parsed-literal::
*module-map-file*:
*module-declaration**
Within a module map file, modules are referred to by a *module-id*, which uses periods to separate each part of a module's name:
.. parsed-literal::
*module-id*:
*identifier* ('.' *identifier*)*
Module declaration
------------------
A module declaration describes a module, including the headers that contribute to that module, its submodules, and other aspects of the module.
.. parsed-literal::
*module-declaration*:
``explicit``:sub:`opt` ``framework``:sub:`opt` ``module`` *module-id* *attributes*:sub:`opt` '{' *module-member** '}'
The *module-id* should consist of only a single *identifier*, which provides the name of the module being defined. Each module shall have a single definition.
The ``explicit`` qualifier can only be applied to a submodule, i.e., a module that is nested within another module. The contents of explicit submodules are only made available when the submodule itself was explicitly named in an import declaration or was re-exported from an imported module.
The ``framework`` qualifier specifies that this module corresponds to a Darwin-style framework. A Darwin-style framework (used primarily on Mac OS X and iOS) is contained entirely in directory ``Name.framework``, where ``Name`` is the name of the framework (and, therefore, the name of the module). That directory has the following layout:
.. parsed-literal::
Name.framework/
module.map Module map for the framework
Headers/ Subdirectory containing framework headers
Frameworks/ Subdirectory containing embedded frameworks
Resources/ Subdirectory containing additional resources
Name Symbolic link to the shared library for the framework
The ``system`` attribute specifies that the module is a system module. When a system module is rebuilt, all of the module's header will be considered system headers, which suppresses warnings. This is equivalent to placing ``#pragma GCC system_header`` in each of the module's headers. The form of attributes is described in the section Attributes_, below.
Modules can have a number of different kinds of members, each of which is described below:
.. parsed-literal::
*module-member*:
*requires-declaration*
*header-declaration*
*umbrella-dir-declaration*
*submodule-declaration*
*export-declaration*
*link-declaration*
*config-macros-declaration*
*conflict-declaration*
Requires declaration
~~~~~~~~~~~~~~~~~~~~
A *requires-declaration* specifies the requirements that an importing translation unit must satisfy to use the module.
.. parsed-literal::
*requires-declaration*:
``requires`` *feature-list*
*feature-list*:
*identifier* (',' *identifier*)*
The requirements clause allows specific modules or submodules to specify that they are only accessible with certain language dialects or on certain platforms. The feature list is a set of identifiers, defined below. If any of the features is not available in a given translation unit, that translation unit shall not import the module.
The following features are defined:
altivec
The target supports AltiVec.
blocks
The "blocks" language feature is available.
cplusplus
C++ support is available.
cplusplus11
C++11 support is available.
objc
Objective-C support is available.
objc_arc
Objective-C Automatic Reference Counting (ARC) is available
opencl
OpenCL is available
tls
Thread local storage is available.
*target feature*
A specific target feature (e.g., ``sse4``, ``avx``, ``neon``) is available.
**Example**: The ``std`` module can be extended to also include C++ and C++11 headers using a *requires-declaration*:
.. parsed-literal::
module std {
// C standard library...
module vector {
requires cplusplus
header "vector"
}
module type_traits {
requires cplusplus11
header "type_traits"
}
}
Header declaration
~~~~~~~~~~~~~~~~~~
A header declaration specifies that a particular header is associated with the enclosing module.
.. parsed-literal::
*header-declaration*:
``umbrella``:sub:`opt` ``header`` *string-literal*
``exclude`` ``header`` *string-literal*
A header declaration that does not contain ``exclude`` specifies a header that contributes to the enclosing module. Specifically, when the module is built, the named header will be parsed and its declarations will be (logically) placed into the enclosing submodule.
A header with the ``umbrella`` specifier is called an umbrella header. An umbrella header includes all of the headers within its directory (and any subdirectories), and is typically used (in the ``#include`` world) to easily access the full API provided by a particular library. With modules, an umbrella header is a convenient shortcut that eliminates the need to write out ``header`` declarations for every library header. A given directory can only contain a single umbrella header.
.. note::
Any headers not included by the umbrella header should have
explicit ``header`` declarations. Use the
``-Wincomplete-umbrella`` warning option to ask Clang to complain
about headers not covered by the umbrella header or the module map.
A header with the ``exclude`` specifier is excluded from the module. It will not be included when the module is built, nor will it be considered to be part of the module.
**Example**: The C header ``assert.h`` is an excellent candidate for an excluded header, because it is meant to be included multiple times (possibly with different ``NDEBUG`` settings).
.. parsed-literal::
module std [system] {
exclude header "assert.h"
}
A given header shall not be referenced by more than one *header-declaration*.
Umbrella directory declaration
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An umbrella directory declaration specifies that all of the headers in the specified directory should be included within the module.
.. parsed-literal::
*umbrella-dir-declaration*:
``umbrella`` *string-literal*
The *string-literal* refers to a directory. When the module is built, all of the header files in that directory (and its subdirectories) are included in the module.
An *umbrella-dir-declaration* shall not refer to the same directory as the location of an umbrella *header-declaration*. In other words, only a single kind of umbrella can be specified for a given directory.
.. note::
Umbrella directories are useful for libraries that have a large number of headers but do not have an umbrella header.
Submodule declaration
~~~~~~~~~~~~~~~~~~~~~
Submodule declarations describe modules that are nested within their enclosing module.
.. parsed-literal::
*submodule-declaration*:
*module-declaration*
*inferred-submodule-declaration*
A *submodule-declaration* that is a *module-declaration* is a nested module. If the *module-declaration* has a ``framework`` specifier, the enclosing module shall have a ``framework`` specifier; the submodule's contents shall be contained within the subdirectory ``Frameworks/SubName.framework``, where ``SubName`` is the name of the submodule.
A *submodule-declaration* that is an *inferred-submodule-declaration* describes a set of submodules that correspond to any headers that are part of the module but are not explicitly described by a *header-declaration*.
.. parsed-literal::
*inferred-submodule-declaration*:
``explicit``:sub:`opt` ``framework``:sub:`opt` ``module`` '*' *attributes*:sub:`opt` '{' *inferred-submodule-member** '}'
*inferred-submodule-member*:
``export`` '*'
A module containing an *inferred-submodule-declaration* shall have either an umbrella header or an umbrella directory. The headers to which the *inferred-submodule-declaration* applies are exactly those headers included by the umbrella header (transitively) or included in the module because they reside within the umbrella directory (or its subdirectories).
For each header included by the umbrella header or in the umbrella directory that is not named by a *header-declaration*, a module declaration is implicitly generated from the *inferred-submodule-declaration*. The module will:
* Have the same name as the header (without the file extension)
* Have the ``explicit`` specifier, if the *inferred-submodule-declaration* has the ``explicit`` specifier
* Have the ``framework`` specifier, if the
*inferred-submodule-declaration* has the ``framework`` specifier
* Have the attributes specified by the \ *inferred-submodule-declaration*
* Contain a single *header-declaration* naming that header
* Contain a single *export-declaration* ``export *``, if the \ *inferred-submodule-declaration* contains the \ *inferred-submodule-member* ``export *``
**Example**: If the subdirectory "MyLib" contains the headers ``A.h`` and ``B.h``, then the following module map:
.. parsed-literal::
module MyLib {
umbrella "MyLib"
explicit module * {
export *
}
}
is equivalent to the (more verbose) module map:
.. parsed-literal::
module MyLib {
explicit module A {
header "A.h"
export *
}
explicit module B {
header "B.h"
export *
}
}
Export declaration
~~~~~~~~~~~~~~~~~~
An *export-declaration* specifies which imported modules will automatically be re-exported as part of a given module's API.
.. parsed-literal::
*export-declaration*:
``export`` *wildcard-module-id*
*wildcard-module-id*:
*identifier*
'*'
*identifier* '.' *wildcard-module-id*
The *export-declaration* names a module or a set of modules that will be re-exported to any translation unit that imports the enclosing module. Each imported module that matches the *wildcard-module-id* up to, but not including, the first ``*`` will be re-exported.
**Example**:: In the following example, importing ``MyLib.Derived`` also provides the API for ``MyLib.Base``:
.. parsed-literal::
module MyLib {
module Base {
header "Base.h"
}
module Derived {
header "Derived.h"
export Base
}
}
Note that, if ``Derived.h`` includes ``Base.h``, one can simply use a wildcard export to re-export everything ``Derived.h`` includes:
.. parsed-literal::
module MyLib {
module Base {
header "Base.h"
}
module Derived {
header "Derived.h"
export *
}
}
.. note::
The wildcard export syntax ``export *`` re-exports all of the
modules that were imported in the actual header file. Because
``#include`` directives are automatically mapped to module imports,
``export *`` provides the same transitive-inclusion behavior
provided by the C preprocessor, e.g., importing a given module
implicitly imports all of the modules on which it depends.
Therefore, liberal use of ``export *`` provides excellent backward
compatibility for programs that rely on transitive inclusion (i.e.,
all of them).
Link declaration
~~~~~~~~~~~~~~~~
A *link-declaration* specifies a library or framework against which a program should be linked if the enclosing module is imported in any translation unit in that program.
.. parsed-literal::
*link-declaration*:
``link`` ``framework``:sub:`opt` *string-literal*
The *string-literal* specifies the name of the library or framework against which the program should be linked. For example, specifying "clangBasic" would instruct the linker to link with ``-lclangBasic`` for a Unix-style linker.
A *link-declaration* with the ``framework`` specifies that the linker should link against the named framework, e.g., with ``-framework MyFramework``.
.. note::
Automatic linking with the ``link`` directive is not yet widely
implemented, because it requires support from both the object file
format and the linker. The notion is similar to Microsoft Visual
Studio's ``#pragma comment(lib...)``.
Configuration macros declaration
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The *config-macros-declaration* specifies the set of configuration macros that have an effect on the the API of the enclosing module.
.. parsed-literal::
*config-macros-declaration*:
``config_macros`` *attributes*:sub:`opt` *config-macro-list*:sub:`opt`
*config-macro-list*:
*identifier* (',' *identifier*)*
Each *identifier* in the *config-macro-list* specifies the name of a macro. The compiler is required to maintain different variants of the given module for differing definitions of any of the named macros.
A *config-macros-declaration* shall only be present on a top-level module, i.e., a module that is not nested within an enclosing module.
The ``exhaustive`` attribute specifies that the list of macros in the *config-macros-declaration* is exhaustive, meaning that no other macro definition is intended to have an effect on the API of that module.
.. note::
The ``exhaustive`` attribute implies that any macro definitions
for macros not listed as configuration macros should be ignored
completely when building the module. As an optimization, the
compiler could reduce the number of unique module variants by not
considering these non-configuration macros. This optimization is not
yet implemented in Clang.
A translation unit shall not import the same module under different definitions of the configuration macros.
.. note::
Clang implements a weak form of this requirement: the definitions
used for configuration macros are fixed based on the definitions
provided by the command line. If an import occurs and the definition
of any configuration macro has changed, the compiler will produce a
warning (under the control of ``-Wconfig-macros``).
**Example:** A logging library might provide different API (e.g., in the form of different definitions for a logging macro) based on the ``NDEBUG`` macro setting:
.. parsed-literal::
module MyLogger {
umbrella header "MyLogger.h"
config_macros [exhaustive] NDEBUG
}
Conflict declarations
~~~~~~~~~~~~~~~~~~~~~
A *conflict-declaration* describes a case where the presence of two different modules in the same translation unit is likely to cause a problem. For example, two modules may provide similar-but-incompatible functionality.
.. parsed-literal::
*conflict-declaration*:
``conflict`` *module-id* ',' *string-literal*
The *module-id* of the *conflict-declaration* specifies the module with which the enclosing module conflicts. The specified module shall not have been imported in the translation unit when the enclosing module is imported.
The *string-literal* provides a message to be provided as part of the compiler diagnostic when two modules conflict.
.. note::
Clang emits a warning (under the control of ``-Wmodule-conflict``)
when a module conflict is discovered.
**Example:**
.. parsed-literal::
module Conflicts {
explicit module A {
header "conflict_a.h"
conflict B, "we just don't like B"
}
module B {
header "conflict_b.h"
}
}
Attributes
----------
Attributes are used in a number of places in the grammar to describe specific behavior of other declarations. The format of attributes is fairly simple.
.. parsed-literal::
*attributes*:
*attribute* *attributes*:sub:`opt`
*attribute*:
'[' *identifier* ']'
Any *identifier* can be used as an attribute, and each declaration specifies what attributes can be applied to it.
Modularizing a Platform
=======================
To get any benefit out of modules, one needs to introduce module maps for software libraries starting at the bottom of the stack. This typically means introducing a module map covering the operating system's headers and the C standard library headers (in ``/usr/include``, for a Unix system).
The module maps will be written using the `module map language`_, which provides the tools necessary to describe the mapping between headers and modules. Because the set of headers differs from one system to the next, the module map will likely have to be somewhat customized for, e.g., a particular distribution and version of the operating system. Moreover, the system headers themselves may require some modification, if they exhibit any anti-patterns that break modules. Such common patterns are described below.
**Macro-guarded copy-and-pasted definitions**
System headers vend core types such as ``size_t`` for users. These types are often needed in a number of system headers, and are almost trivial to write. Hence, it is fairly common to see a definition such as the following copy-and-pasted throughout the headers:
.. parsed-literal::
#ifndef _SIZE_T
#define _SIZE_T
typedef __SIZE_TYPE__ size_t;
#endif
Unfortunately, when modules compiles all of the C library headers together into a single module, only the first actual type definition of ``size_t`` will be visible, and then only in the submodule corresponding to the lucky first header. Any other headers that have copy-and-pasted versions of this pattern will *not* have a definition of ``size_t``. Importing the submodule corresponding to one of those headers will therefore not yield ``size_t`` as part of the API, because it wasn't there when the header was parsed. The fix for this problem is either to pull the copied declarations into a common header that gets included everywhere ``size_t`` is part of the API, or to eliminate the ``#ifndef`` and redefine the ``size_t`` type. The latter works for C++ headers and C11, but will cause an error for non-modules C90/C99, where redefinition of ``typedefs`` is not permitted.
**Conflicting definitions**
Different system headers may provide conflicting definitions for various macros, functions, or types. These conflicting definitions don't tend to cause problems in a pre-modules world unless someone happens to include both headers in one translation unit. Since the fix is often simply "don't do that", such problems persist. Modules requires that the conflicting definitions be eliminated or that they be placed in separate modules (the former is generally the better answer).
**Missing includes**
Headers are often missing ``#include`` directives for headers that they actually depend on. As with the problem of conflicting definitions, this only affects unlucky users who don't happen to include headers in the right order. With modules, the headers of a particular module will be parsed in isolation, so the module may fail to build if there are missing includes.
**Headers that vend multiple APIs at different times**
Some systems have headers that contain a number of different kinds of API definitions, only some of which are made available with a given include. For example, the header may vend ``size_t`` only when the macro ``__need_size_t`` is defined before that header is included, and also vend ``wchar_t`` only when the macro ``__need_wchar_t`` is defined. Such headers are often included many times in a single translation unit, and will have no include guards. There is no sane way to map this header to a submodule. One can either eliminate the header (e.g., by splitting it into separate headers, one per actual API) or simply ``exclude`` it in the module map.
To detect and help address some of these problems, the ``clang-tools-extra`` repository contains a ``modularize`` tool that parses a set of given headers and attempts to detect these problems and produce a report. See the tool's in-source documentation for information on how to check your system or library headers.
Future Directions
=================
Modules is an experimental feature, and there is much work left to do to make it both real and useful. Here are a few ideas:
**Detect unused module imports**
Unlike with ``#include`` directives, it should be fairly simple to track whether a directly-imported module has ever been used. By doing so, Clang can emit ``unused import`` or ``unused #include`` diagnostics, including Fix-Its to remove the useless imports/includes.
**Fix-Its for missing imports**
It's fairly common for one to make use of some API while writing code, only to get a compiler error about "unknown type" or "no function named" because the corresponding header has not been included. Clang should detect such cases and auto-import the required module (with a Fix-It!).
**Improve modularize**
The modularize tool is both extremely important (for deployment) and extremely crude. It needs better UI, better detection of problems (especially for C++), and perhaps an assistant mode to help write module maps for you.
**C++ Support**
Modules clearly has to work for C++, or we'll never get to use it for the Clang code base.
Where To Learn More About Modules
=================================
The Clang source code provides additional information about modules:
``clang/lib/Headers/module.map``
Module map for Clang's compiler-specific header files.
``clang/test/Modules/``
Tests specifically related to modules functionality.
``clang/include/clang/Basic/Module.h``
The ``Module`` class in this header describes a module, and is used throughout the compiler to implement modules.
``clang/include/clang/Lex/ModuleMap.h``
The ``ModuleMap`` class in this header describes the full module map, consisting of all of the module map files that have been parsed, and providing facilities for looking up module maps and mapping between modules and headers (in both directions).
PCHInternals_
Information about the serialized AST format used for precompiled headers and modules. The actual implementation is in the ``clangSerialization`` library.
.. [#] Automatic linking against the libraries of modules requires specific linker support, which is not widely available.
.. [#] Modules are only available in C and Objective-C; a separate flag ``-fcxx-modules`` enables modules support for C++, which is even more experimental and broken.
.. [#] There are certain anti-patterns that occur in headers, particularly system headers, that cause problems for modules. The section `Modularizing a Platform`_ describes some of them.
.. [#] The second instance is actually a new thread within the current process, not a separate process. However, the original compiler instance is blocked on the execution of this thread.
.. [#] The preprocessing context in which the modules are parsed is actually dependent on the command-line options provided to the compiler, including the language dialect and any ``-D`` options. However, the compiled modules for different command-line options are kept distinct, and any preprocessor directives that occur within the translation unit are ignored. See the section on the `Configuration macros declaration`_ for more information.
.. _PCHInternals: PCHInternals.html

View File

@ -1,423 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<!-- Material used from: HTML 4.01 specs: http://www.w3.org/TR/html401/ -->
<html>
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF8">
<title>Objective-C Literals</title>
<link type="text/css" rel="stylesheet" href="../menu.css">
<link type="text/css" rel="stylesheet" href="../content.css">
<style type="text/css">
td {
vertical-align: top;
}
th { background-color: #ffddaa; }
</style>
</head>
<body>
<!--#include virtual="../menu.html.incl"-->
<div id="content">
<h1>Objective-C Literals</h1>
<h2>Introduction</h2>
<p>Three new features were introduced into clang at the same time: <i>NSNumber Literals</i> provide a syntax for creating <code>NSNumber</code> from scalar literal expressions; <i>Collection Literals</i> provide a short-hand for creating arrays and dictionaries; <i>Object Subscripting</i> provides a way to use subscripting with Objective-C objects. Users of Apple compiler releases can use these features starting with the Apple LLVM Compiler 4.0. Users of open-source LLVM.org compiler releases can use these features starting with clang v3.1.</p>
<p>These language additions simplify common Objective-C programming patterns, make programs more concise, and improve the safety of container creation.</p>
<p>This document describes how the features are implemented in clang, and how to use them in your own programs.</p>
<h2>NSNumber Literals</h2>
<p>The framework class <code>NSNumber</code> is used to wrap scalar values inside objects: signed and unsigned integers (<code>char</code>, <code>short</code>, <code>int</code>, <code>long</code>, <code>long long</code>), floating point numbers (<code>float</code>, <code>double</code>), and boolean values (<code>BOOL</code>, C++ <code>bool</code>). Scalar values wrapped in objects are also known as <i>boxed</i> values.</p>
<p>In Objective-C, any character, numeric or boolean literal prefixed with the <code>'@'</code> character will evaluate to a pointer to an <code>NSNumber</code> object initialized with that value. C's type suffixes may be used to control the size of numeric literals.</p>
<h3>Examples</h3>
<p>The following program illustrates the rules for <code>NSNumber</code> literals:</p>
<pre>
void main(int argc, const char *argv[]) {
// character literals.
NSNumber *theLetterZ = @'Z'; // equivalent to [NSNumber numberWithChar:'Z']
// integral literals.
NSNumber *fortyTwo = @42; // equivalent to [NSNumber numberWithInt:42]
NSNumber *fortyTwoUnsigned = @42U; // equivalent to [NSNumber numberWithUnsignedInt:42U]
NSNumber *fortyTwoLong = @42L; // equivalent to [NSNumber numberWithLong:42L]
NSNumber *fortyTwoLongLong = @42LL; // equivalent to [NSNumber numberWithLongLong:42LL]
// floating point literals.
NSNumber *piFloat = @3.141592654F; // equivalent to [NSNumber numberWithFloat:3.141592654F]
NSNumber *piDouble = @3.1415926535; // equivalent to [NSNumber numberWithDouble:3.1415926535]
// BOOL literals.
NSNumber *yesNumber = @YES; // equivalent to [NSNumber numberWithBool:YES]
NSNumber *noNumber = @NO; // equivalent to [NSNumber numberWithBool:NO]
#ifdef __cplusplus
NSNumber *trueNumber = @true; // equivalent to [NSNumber numberWithBool:(BOOL)true]
NSNumber *falseNumber = @false; // equivalent to [NSNumber numberWithBool:(BOOL)false]
#endif
}
</pre>
<h3>Discussion</h3>
<p>NSNumber literals only support literal scalar values after the <code>'@'</code>. Consequently, <code>@INT_MAX</code> works, but <code>@INT_MIN</code> does not, because they are defined like this:</p>
<pre>
#define INT_MAX 2147483647 /* max value for an int */
#define INT_MIN (-2147483647-1) /* min value for an int */
</pre>
<p>The definition of <code>INT_MIN</code> is not a simple literal, but a parenthesized expression. Parenthesized
expressions are supported using the <a href="#objc_boxed_expressions">boxed expression</a> syntax, which is described in the next section.</p>
<p>Because <code>NSNumber</code> does not currently support wrapping <code>long double</code> values, the use of a <code>long double NSNumber</code> literal (e.g. <code>@123.23L</code>) will be rejected by the compiler.</p>
<p>Previously, the <code>BOOL</code> type was simply a typedef for <code>signed char</code>, and <code>YES</code> and <code>NO</code> were macros that expand to <code>(BOOL)1</code> and <code>(BOOL)0</code> respectively. To support <code>@YES</code> and <code>@NO</code> expressions, these macros are now defined using new language keywords in <code>&LT;objc/objc.h&GT;</code>:</p>
<pre>
#if __has_feature(objc_bool)
#define YES __objc_yes
#define NO __objc_no
#else
#define YES ((BOOL)1)
#define NO ((BOOL)0)
#endif
</pre>
<p>The compiler implicitly converts <code>__objc_yes</code> and <code>__objc_no</code> to <code>(BOOL)1</code> and <code>(BOOL)0</code>. The keywords are used to disambiguate <code>BOOL</code> and integer literals.</p>
<p>Objective-C++ also supports <code>@true</code> and <code>@false</code> expressions, which are equivalent to <code>@YES</code> and <code>@NO</code>.</p>
<!-- ======================================================================= -->
<h2 id="objc_boxed_expressions">Boxed Expressions</h2>
<!-- ======================================================================= -->
<p>Objective-C provides a new syntax for boxing C expressions:</p>
<pre>
<code>@( <em>expression</em> )</code>
</pre>
<p>Expressions of scalar (numeric, enumerated, BOOL) and C string pointer types
are supported:</p>
<pre>
// numbers.
NSNumber *smallestInt = @(-INT_MAX - 1); // [NSNumber numberWithInt:(-INT_MAX - 1)]
NSNumber *piOverTwo = @(M_PI / 2); // [NSNumber numberWithDouble:(M_PI / 2)]
// enumerated types.
typedef enum { Red, Green, Blue } Color;
NSNumber *favoriteColor = @(Green); // [NSNumber numberWithInt:((int)Green)]
// strings.
NSString *path = @(getenv("PATH")); // [NSString stringWithUTF8String:(getenv("PATH"))]
NSArray *pathComponents = [path componentsSeparatedByString:@":"];
</pre>
<h3>Boxed Enums</h3>
<p>
Cocoa frameworks frequently define constant values using <em>enums.</em> Although enum values are integral, they may not be used directly as boxed literals (this avoids conflicts with future <code>'@'</code>-prefixed Objective-C keywords). Instead, an enum value must be placed inside a boxed expression. The following example demonstrates configuring an <code>AVAudioRecorder</code> using a dictionary that contains a boxed enumeration value:
</p>
<pre>
enum {
AVAudioQualityMin = 0,
AVAudioQualityLow = 0x20,
AVAudioQualityMedium = 0x40,
AVAudioQualityHigh = 0x60,
AVAudioQualityMax = 0x7F
};
- (AVAudioRecorder *)recordToFile:(NSURL *)fileURL {
NSDictionary *settings = @{ AVEncoderAudioQualityKey : @(AVAudioQualityMax) };
return [[AVAudioRecorder alloc] initWithURL:fileURL settings:settings error:NULL];
}
</pre>
<p>
The expression <code>@(AVAudioQualityMax)</code> converts <code>AVAudioQualityMax</code> to an integer type, and boxes the value accordingly. If the enum has a <a href="http://clang.llvm.org/docs/LanguageExtensions.html#objc_fixed_enum">fixed underlying type</a> as in:
</p>
<pre>
typedef enum : unsigned char { Red, Green, Blue } Color;
NSNumber *red = @(Red), *green = @(Green), *blue = @(Blue); // => [NSNumber numberWithUnsignedChar:]
</pre>
<p>
then the fixed underlying type will be used to select the correct <code>NSNumber</code> creation method.
</p>
<p>
Boxing a value of enum type will result in a <code>NSNumber</code> pointer with a creation method according to the underlying type of the enum,
which can be a <a href="http://clang.llvm.org/docs/LanguageExtensions.html#objc_fixed_enum">fixed underlying type</a> or a compiler-defined
integer type capable of representing the values of all the members of the enumeration:
</p>
<pre>
typedef enum : unsigned char { Red, Green, Blue } Color;
Color col = Red;
NSNumber *nsCol = @(col); // => [NSNumber numberWithUnsignedChar:]
</pre>
<h3>Boxed C Strings</h3>
<p>
A C string literal prefixed by the <code>'@'</code> token denotes an <code>NSString</code> literal in the same way a numeric literal prefixed by the <code>'@'</code> token denotes an <code>NSNumber</code> literal. When the type of the parenthesized expression is <code>(char *)</code> or <code>(const char *)</code>, the result of the boxed expression is a pointer to an <code>NSString</code> object containing equivalent character data, which is assumed to be '\0'-terminated and UTF-8 encoded. The following example converts C-style command line arguments into <code>NSString</code> objects.
</p>
<pre>
// Partition command line arguments into positional and option arguments.
NSMutableArray *args = [NSMutableArray new];
NSMutableDictionary *options = [NSMutableDictionary new];
while (--argc) {
const char *arg = *++argv;
if (strncmp(arg, "--", 2) == 0) {
options[@(arg + 2)] = @(*++argv); // --key value
} else {
[args addObject:@(arg)]; // positional argument
}
}
</pre>
<p>
As with all C pointers, character pointer expressions can involve arbitrary pointer arithmetic, therefore programmers must ensure that the character data is valid. Passing <code>NULL</code> as the character pointer will raise an exception at runtime. When possible, the compiler will reject <code>NULL</code> character pointers used in boxed expressions.
</p>
<h3>Availability</h3>
<p>Boxed expressions will be available in clang 3.2. It is not currently available in any Apple compiler.</p>
<h2>Container Literals</h2>
<p>Objective-C now supports a new expression syntax for creating immutable array and dictionary container objects.</p>
<h3>Examples</h3>
<p>Immutable array expression:</p>
<pre>
NSArray *array = @[ @"Hello", NSApp, [NSNumber numberWithInt:42] ];
</pre>
<p>This creates an <code>NSArray</code> with 3 elements. The comma-separated sub-expressions of an array literal can be any Objective-C object pointer typed expression.</p>
<p>Immutable dictionary expression:</p>
<pre>
NSDictionary *dictionary = @{
@"name" : NSUserName(),
@"date" : [NSDate date],
@"processInfo" : [NSProcessInfo processInfo]
};
</pre>
<p>This creates an <code>NSDictionary</code> with 3 key/value pairs. Value sub-expressions of a dictionary literal must be Objective-C object pointer typed, as in array literals. Key sub-expressions must be of an Objective-C object pointer type that implements the <code>&LT;NSCopying&GT;</code> protocol.</p>
<h3>Discussion</h3>
<p>Neither keys nor values can have the value <code>nil</code> in containers. If the compiler can prove that a key or value is <code>nil</code> at compile time, then a warning will be emitted. Otherwise, a runtime error will occur.</p>
<p>Using array and dictionary literals is safer than the variadic creation forms commonly in use today. Array literal expressions expand to calls to <code>+[NSArray arrayWithObjects:count:]</code>, which validates that all objects are non-<code>nil</code>. The variadic form, <code>+[NSArray arrayWithObjects:]</code> uses <code>nil</code> as an argument list terminator, which can lead to malformed array objects. Dictionary literals are similarly created with <code>+[NSDictionary dictionaryWithObjects:forKeys:count:]</code> which validates all objects and keys, unlike <code>+[NSDictionary dictionaryWithObjectsAndKeys:]</code> which also uses a <code>nil</code> parameter as an argument list terminator.</p>
<h2>Object Subscripting</h2>
<p>Objective-C object pointer values can now be used with C's subscripting operator.</p>
<h3>Examples</h3>
<p>The following code demonstrates the use of object subscripting syntax with <code>NSMutableArray</code> and <code>NSMutableDictionary</code> objects:</p>
<pre>
NSMutableArray *array = ...;
NSUInteger idx = ...;
id newObject = ...;
id oldObject = array[idx];
array[idx] = newObject; // replace oldObject with newObject
NSMutableDictionary *dictionary = ...;
NSString *key = ...;
oldObject = dictionary[key];
dictionary[key] = newObject; // replace oldObject with newObject
</pre>
<p>The next section explains how subscripting expressions map to accessor methods.</p>
<h3>Subscripting Methods</h3>
<p>Objective-C supports two kinds of subscript expressions: <i>array-style</i> subscript expressions use integer typed subscripts; <i>dictionary-style</i> subscript expressions use Objective-C object pointer typed subscripts. Each type of subscript expression is mapped to a message send using a predefined selector. The advantage of this design is flexibility: class designers are free to introduce subscripting by declaring methods or by adopting protocols. Moreover, because the method names are selected by the type of the subscript, an object can be subscripted using both array and dictionary styles.</p>
<h4>Array-Style Subscripting</h4>
<p>When the subscript operand has an integral type, the expression is rewritten to use one of two different selectors, depending on whether the element is being read or written. When an expression reads an element using an integral index, as in the following example:</p>
<pre>
NSUInteger idx = ...;
id value = object[idx];
</pre>
<p>it is translated into a call to <code>objectAtIndexedSubscript:</code></p>
<pre>
id value = [object objectAtIndexedSubscript:idx];
</pre>
<p>When an expression writes an element using an integral index:</p>
<pre>
object[idx] = newValue;
</pre>
<p>it is translated to a call to <code>setObject:atIndexedSubscript:</code></p>
<pre>
[object setObject:newValue atIndexedSubscript:idx];
</pre>
<p>These message sends are then type-checked and performed just like explicit message sends. The method used for objectAtIndexedSubscript: must be declared with an argument of integral type and a return value of some Objective-C object pointer type. The method used for setObject:atIndexedSubscript: must be declared with its first argument having some Objective-C pointer type and its second argument having integral type.</p>
<p>The meaning of indexes is left up to the declaring class. The compiler will coerce the index to the appropriate argument type of the method it uses for type-checking. For an instance of <code>NSArray</code>, reading an element using an index outside the range <code>[0, array.count)</code> will raise an exception. For an instance of <code>NSMutableArray</code>, assigning to an element using an index within this range will replace that element, but assigning to an element using an index outside this range will raise an exception; no syntax is provided for inserting, appending, or removing elements for mutable arrays.</p>
<p>A class need not declare both methods in order to take advantage of this language feature. For example, the class <code>NSArray</code> declares only <code>objectAtIndexedSubscript:</code>, so that assignments to elements will fail to type-check; moreover, its subclass <code>NSMutableArray</code> declares <code>setObject:atIndexedSubscript:</code>.</p>
<h4>Dictionary-Style Subscripting</h4>
<p>When the subscript operand has an Objective-C object pointer type, the expression is rewritten to use one of two different selectors, depending on whether the element is being read from or written to. When an expression reads an element using an Objective-C object pointer subscript operand, as in the following example:</p>
<pre>
id key = ...;
id value = object[key];
</pre>
<p>it is translated into a call to the <code>objectForKeyedSubscript:</code> method:</p>
<pre>
id value = [object objectForKeyedSubscript:key];
</pre>
<p>When an expression writes an element using an Objective-C object pointer subscript:</p>
<pre>
object[key] = newValue;
</pre>
<p>it is translated to a call to <code>setObject:forKeyedSubscript:</code></p>
<pre>
[object setObject:newValue forKeyedSubscript:key];
</pre>
<p>The behavior of <code>setObject:forKeyedSubscript:</code> is class-specific; but in general it should replace an existing value if one is already associated with a key, otherwise it should add a new value for the key. No syntax is provided for removing elements from mutable dictionaries.</p>
<h3>Discussion</h3>
<p>An Objective-C subscript expression occurs when the base operand of the C subscript operator has an Objective-C object pointer type. Since this potentially collides with pointer arithmetic on the value, these expressions are only supported under the modern Objective-C runtime, which categorically forbids such arithmetic.</p>
<p>Currently, only subscripts of integral or Objective-C object pointer type are supported. In C++, a class type can be used if it has a single conversion function to an integral or Objective-C pointer type, in which case that conversion is applied and analysis continues as appropriate. Otherwise, the expression is ill-formed.</p>
<p>An Objective-C object subscript expression is always an l-value. If the expression appears on the left-hand side of a simple assignment operator (=), the element is written as described below. If the expression appears on the left-hand side of a compound assignment operator (e.g. +=), the program is ill-formed, because the result of reading an element is always an Objective-C object pointer and no binary operators are legal on such pointers. If the expression appears in any other position, the element is read as described below. It is an error to take the address of a subscript expression, or (in C++) to bind a reference to it.</p>
<p>Programs can use object subscripting with Objective-C object pointers of type <code>id</code>. Normal dynamic message send rules apply; the compiler must see <i>some</i> declaration of the subscripting methods, and will pick the declaration seen first.</p>
<h2>Caveats</h2>
<p>Objects created using the literal or boxed expression syntax are not guaranteed to be uniqued by the runtime, but nor are they guaranteed to be newly-allocated. As such, the result of performing direct comparisons against the location of an object literal (using <code>==</code>, <code>!=</code>, <code>&lt;</code>, <code>&lt;=</code>, <code>&gt;</code>, or <code>&gt;=</code>) is not well-defined. This is usually a simple mistake in code that intended to call the <code>isEqual:</code> method (or the <code>compare:</code> method).</p>
<p>This caveat applies to compile-time string literals as well. Historically, string literals (using the <code>@"..."</code> syntax) have been uniqued across translation units during linking. This is an implementation detail of the compiler and should not be relied upon. If you are using such code, please use global string constants instead (<code>NSString * const MyConst = @"..."</code>) or use <code>isEqual:</code>.</p>
<h2>Grammar Additions</h2>
<p>To support the new syntax described above, the Objective-C <code>@</code>-expression grammar has the following new productions:</p>
<pre>
objc-at-expression : '@' (string-literal | encode-literal | selector-literal | protocol-literal | object-literal)
;
object-literal : ('+' | '-')? numeric-constant
| character-constant
| boolean-constant
| array-literal
| dictionary-literal
;
boolean-constant : '__objc_yes' | '__objc_no' | 'true' | 'false' /* boolean keywords. */
;
array-literal : '[' assignment-expression-list ']'
;
assignment-expression-list : assignment-expression (',' assignment-expression-list)?
| /* empty */
;
dictionary-literal : '{' key-value-list '}'
;
key-value-list : key-value-pair (',' key-value-list)?
| /* empty */
;
key-value-pair : assignment-expression ':' assignment-expression
;
</pre>
<p>Note: <code>@true</code> and <code>@false</code> are only supported in Objective-C++.</p>
<h2>Availability Checks</h2>
<p>Programs test for the new features by using clang's __has_feature checks. Here are examples of their use:</p>
<pre>
#if __has_feature(objc_array_literals)
// new way.
NSArray *elements = @[ @"H", @"He", @"O", @"C" ];
#else
// old way (equivalent).
id objects[] = { @"H", @"He", @"O", @"C" };
NSArray *elements = [NSArray arrayWithObjects:objects count:4];
#endif
#if __has_feature(objc_dictionary_literals)
// new way.
NSDictionary *masses = @{ @"H" : @1.0078, @"He" : @4.0026, @"O" : @15.9990, @"C" : @12.0096 };
#else
// old way (equivalent).
id keys[] = { @"H", @"He", @"O", @"C" };
id values[] = { [NSNumber numberWithDouble:1.0078], [NSNumber numberWithDouble:4.0026],
[NSNumber numberWithDouble:15.9990], [NSNumber numberWithDouble:12.0096] };
NSDictionary *masses = [NSDictionary dictionaryWithObjects:objects forKeys:keys count:4];
#endif
#if __has_feature(objc_subscripting)
NSUInteger i, count = elements.count;
for (i = 0; i < count; ++i) {
NSString *element = elements[i];
NSNumber *mass = masses[element];
NSLog(@"the mass of %@ is %@", element, mass);
}
#else
NSUInteger i, count = [elements count];
for (i = 0; i < count; ++i) {
NSString *element = [elements objectAtIndex:i];
NSNumber *mass = [masses objectForKey:element];
NSLog(@"the mass of %@ is %@", element, mass);
}
#endif
</pre>
<p>Code can use also <code>__has_feature(objc_bool)</code> to check for the availability of numeric literals support. This checks for the new <code>__objc_yes / __objc_no</code> keywords, which enable the use of <code>@YES / @NO</code> literals.</p>
<p>To check whether boxed expressions are supported, use <code>__has_feature(objc_boxed_expressions)</code> feature macro.</p>
</div>
</body>
</html>

554
docs/ObjectiveCLiterals.rst Normal file
View File

@ -0,0 +1,554 @@
====================
Objective-C Literals
====================
Introduction
============
Three new features were introduced into clang at the same time:
*NSNumber Literals* provide a syntax for creating ``NSNumber`` from
scalar literal expressions; *Collection Literals* provide a short-hand
for creating arrays and dictionaries; *Object Subscripting* provides a
way to use subscripting with Objective-C objects. Users of Apple
compiler releases can use these features starting with the Apple LLVM
Compiler 4.0. Users of open-source LLVM.org compiler releases can use
these features starting with clang v3.1.
These language additions simplify common Objective-C programming
patterns, make programs more concise, and improve the safety of
container creation.
This document describes how the features are implemented in clang, and
how to use them in your own programs.
NSNumber Literals
=================
The framework class ``NSNumber`` is used to wrap scalar values inside
objects: signed and unsigned integers (``char``, ``short``, ``int``,
``long``, ``long long``), floating point numbers (``float``,
``double``), and boolean values (``BOOL``, C++ ``bool``). Scalar values
wrapped in objects are also known as *boxed* values.
In Objective-C, any character, numeric or boolean literal prefixed with
the ``'@'`` character will evaluate to a pointer to an ``NSNumber``
object initialized with that value. C's type suffixes may be used to
control the size of numeric literals.
Examples
--------
The following program illustrates the rules for ``NSNumber`` literals:
.. code-block:: objc
void main(int argc, const char *argv[]) {
// character literals.
NSNumber *theLetterZ = @'Z'; // equivalent to [NSNumber numberWithChar:'Z']
// integral literals.
NSNumber *fortyTwo = @42; // equivalent to [NSNumber numberWithInt:42]
NSNumber *fortyTwoUnsigned = @42U; // equivalent to [NSNumber numberWithUnsignedInt:42U]
NSNumber *fortyTwoLong = @42L; // equivalent to [NSNumber numberWithLong:42L]
NSNumber *fortyTwoLongLong = @42LL; // equivalent to [NSNumber numberWithLongLong:42LL]
// floating point literals.
NSNumber *piFloat = @3.141592654F; // equivalent to [NSNumber numberWithFloat:3.141592654F]
NSNumber *piDouble = @3.1415926535; // equivalent to [NSNumber numberWithDouble:3.1415926535]
// BOOL literals.
NSNumber *yesNumber = @YES; // equivalent to [NSNumber numberWithBool:YES]
NSNumber *noNumber = @NO; // equivalent to [NSNumber numberWithBool:NO]
#ifdef __cplusplus
NSNumber *trueNumber = @true; // equivalent to [NSNumber numberWithBool:(BOOL)true]
NSNumber *falseNumber = @false; // equivalent to [NSNumber numberWithBool:(BOOL)false]
#endif
}
Discussion
----------
NSNumber literals only support literal scalar values after the ``'@'``.
Consequently, ``@INT_MAX`` works, but ``@INT_MIN`` does not, because
they are defined like this:
.. code-block:: objc
#define INT_MAX 2147483647 /* max value for an int */
#define INT_MIN (-2147483647-1) /* min value for an int */
The definition of ``INT_MIN`` is not a simple literal, but a
parenthesized expression. Parenthesized expressions are supported using
the `boxed expression <#objc_boxed_expressions>`_ syntax, which is
described in the next section.
Because ``NSNumber`` does not currently support wrapping ``long double``
values, the use of a ``long double NSNumber`` literal (e.g.
``@123.23L``) will be rejected by the compiler.
Previously, the ``BOOL`` type was simply a typedef for ``signed char``,
and ``YES`` and ``NO`` were macros that expand to ``(BOOL)1`` and
``(BOOL)0`` respectively. To support ``@YES`` and ``@NO`` expressions,
these macros are now defined using new language keywords in
``&LT;objc/objc.h&GT;``:
.. code-block:: objc
#if __has_feature(objc_bool)
#define YES __objc_yes
#define NO __objc_no
#else
#define YES ((BOOL)1)
#define NO ((BOOL)0)
#endif
The compiler implicitly converts ``__objc_yes`` and ``__objc_no`` to
``(BOOL)1`` and ``(BOOL)0``. The keywords are used to disambiguate
``BOOL`` and integer literals.
Objective-C++ also supports ``@true`` and ``@false`` expressions, which
are equivalent to ``@YES`` and ``@NO``.
Boxed Expressions
=================
Objective-C provides a new syntax for boxing C expressions:
.. code-block:: objc
@( <expression> )
Expressions of scalar (numeric, enumerated, BOOL) and C string pointer
types are supported:
.. code-block:: objc
// numbers.
NSNumber *smallestInt = @(-INT_MAX - 1); // [NSNumber numberWithInt:(-INT_MAX - 1)]
NSNumber *piOverTwo = @(M_PI / 2); // [NSNumber numberWithDouble:(M_PI / 2)]
// enumerated types.
typedef enum { Red, Green, Blue } Color;
NSNumber *favoriteColor = @(Green); // [NSNumber numberWithInt:((int)Green)]
// strings.
NSString *path = @(getenv("PATH")); // [NSString stringWithUTF8String:(getenv("PATH"))]
NSArray *pathComponents = [path componentsSeparatedByString:@":"];
Boxed Enums
-----------
Cocoa frameworks frequently define constant values using *enums.*
Although enum values are integral, they may not be used directly as
boxed literals (this avoids conflicts with future ``'@'``-prefixed
Objective-C keywords). Instead, an enum value must be placed inside a
boxed expression. The following example demonstrates configuring an
``AVAudioRecorder`` using a dictionary that contains a boxed enumeration
value:
.. code-block:: objc
enum {
AVAudioQualityMin = 0,
AVAudioQualityLow = 0x20,
AVAudioQualityMedium = 0x40,
AVAudioQualityHigh = 0x60,
AVAudioQualityMax = 0x7F
};
- (AVAudioRecorder *)recordToFile:(NSURL *)fileURL {
NSDictionary *settings = @{ AVEncoderAudioQualityKey : @(AVAudioQualityMax) };
return [[AVAudioRecorder alloc] initWithURL:fileURL settings:settings error:NULL];
}
The expression ``@(AVAudioQualityMax)`` converts ``AVAudioQualityMax``
to an integer type, and boxes the value accordingly. If the enum has a
:ref:`fixed underlying type <objc-fixed-enum>` as in:
.. code-block:: objc
typedef enum : unsigned char { Red, Green, Blue } Color;
NSNumber *red = @(Red), *green = @(Green), *blue = @(Blue); // => [NSNumber numberWithUnsignedChar:]
then the fixed underlying type will be used to select the correct
``NSNumber`` creation method.
Boxing a value of enum type will result in a ``NSNumber`` pointer with a
creation method according to the underlying type of the enum, which can
be a :ref:`fixed underlying type <objc-fixed-enum>`
or a compiler-defined integer type capable of representing the values of
all the members of the enumeration:
.. code-block:: objc
typedef enum : unsigned char { Red, Green, Blue } Color;
Color col = Red;
NSNumber *nsCol = @(col); // => [NSNumber numberWithUnsignedChar:]
Boxed C Strings
---------------
A C string literal prefixed by the ``'@'`` token denotes an ``NSString``
literal in the same way a numeric literal prefixed by the ``'@'`` token
denotes an ``NSNumber`` literal. When the type of the parenthesized
expression is ``(char *)`` or ``(const char *)``, the result of the
boxed expression is a pointer to an ``NSString`` object containing
equivalent character data, which is assumed to be '\\0'-terminated and
UTF-8 encoded. The following example converts C-style command line
arguments into ``NSString`` objects.
.. code-block:: objc
// Partition command line arguments into positional and option arguments.
NSMutableArray *args = [NSMutableArray new];
NSMutableDictionary *options = [NSMutableDictionary new];
while (--argc) {
const char *arg = *++argv;
if (strncmp(arg, "--", 2) == 0) {
options[@(arg + 2)] = @(*++argv); // --key value
} else {
[args addObject:@(arg)]; // positional argument
}
}
As with all C pointers, character pointer expressions can involve
arbitrary pointer arithmetic, therefore programmers must ensure that the
character data is valid. Passing ``NULL`` as the character pointer will
raise an exception at runtime. When possible, the compiler will reject
``NULL`` character pointers used in boxed expressions.
Availability
------------
Boxed expressions will be available in clang 3.2. It is not currently
available in any Apple compiler.
Container Literals
==================
Objective-C now supports a new expression syntax for creating immutable
array and dictionary container objects.
Examples
--------
Immutable array expression:
.. code-block:: objc
NSArray *array = @[ @"Hello", NSApp, [NSNumber numberWithInt:42] ];
This creates an ``NSArray`` with 3 elements. The comma-separated
sub-expressions of an array literal can be any Objective-C object
pointer typed expression.
Immutable dictionary expression:
.. code-block:: objc
NSDictionary *dictionary = @{
@"name" : NSUserName(),
@"date" : [NSDate date],
@"processInfo" : [NSProcessInfo processInfo]
};
This creates an ``NSDictionary`` with 3 key/value pairs. Value
sub-expressions of a dictionary literal must be Objective-C object
pointer typed, as in array literals. Key sub-expressions must be of an
Objective-C object pointer type that implements the
``&LT;NSCopying&GT;`` protocol.
Discussion
----------
Neither keys nor values can have the value ``nil`` in containers. If the
compiler can prove that a key or value is ``nil`` at compile time, then
a warning will be emitted. Otherwise, a runtime error will occur.
Using array and dictionary literals is safer than the variadic creation
forms commonly in use today. Array literal expressions expand to calls
to ``+[NSArray arrayWithObjects:count:]``, which validates that all
objects are non-``nil``. The variadic form,
``+[NSArray arrayWithObjects:]`` uses ``nil`` as an argument list
terminator, which can lead to malformed array objects. Dictionary
literals are similarly created with
``+[NSDictionary dictionaryWithObjects:forKeys:count:]`` which validates
all objects and keys, unlike
``+[NSDictionary dictionaryWithObjectsAndKeys:]`` which also uses a
``nil`` parameter as an argument list terminator.
Object Subscripting
===================
Objective-C object pointer values can now be used with C's subscripting
operator.
Examples
--------
The following code demonstrates the use of object subscripting syntax
with ``NSMutableArray`` and ``NSMutableDictionary`` objects:
.. code-block:: objc
NSMutableArray *array = ...;
NSUInteger idx = ...;
id newObject = ...;
id oldObject = array[idx];
array[idx] = newObject; // replace oldObject with newObject
NSMutableDictionary *dictionary = ...;
NSString *key = ...;
oldObject = dictionary[key];
dictionary[key] = newObject; // replace oldObject with newObject
The next section explains how subscripting expressions map to accessor
methods.
Subscripting Methods
--------------------
Objective-C supports two kinds of subscript expressions: *array-style*
subscript expressions use integer typed subscripts; *dictionary-style*
subscript expressions use Objective-C object pointer typed subscripts.
Each type of subscript expression is mapped to a message send using a
predefined selector. The advantage of this design is flexibility: class
designers are free to introduce subscripting by declaring methods or by
adopting protocols. Moreover, because the method names are selected by
the type of the subscript, an object can be subscripted using both array
and dictionary styles.
Array-Style Subscripting
^^^^^^^^^^^^^^^^^^^^^^^^
When the subscript operand has an integral type, the expression is
rewritten to use one of two different selectors, depending on whether
the element is being read or written. When an expression reads an
element using an integral index, as in the following example:
.. code-block:: objc
NSUInteger idx = ...;
id value = object[idx];
it is translated into a call to ``objectAtIndexedSubscript:``
.. code-block:: objc
id value = [object objectAtIndexedSubscript:idx];
When an expression writes an element using an integral index:
.. code-block:: objc
object[idx] = newValue;
it is translated to a call to ``setObject:atIndexedSubscript:``
.. code-block:: objc
[object setObject:newValue atIndexedSubscript:idx];
These message sends are then type-checked and performed just like
explicit message sends. The method used for objectAtIndexedSubscript:
must be declared with an argument of integral type and a return value of
some Objective-C object pointer type. The method used for
setObject:atIndexedSubscript: must be declared with its first argument
having some Objective-C pointer type and its second argument having
integral type.
The meaning of indexes is left up to the declaring class. The compiler
will coerce the index to the appropriate argument type of the method it
uses for type-checking. For an instance of ``NSArray``, reading an
element using an index outside the range ``[0, array.count)`` will raise
an exception. For an instance of ``NSMutableArray``, assigning to an
element using an index within this range will replace that element, but
assigning to an element using an index outside this range will raise an
exception; no syntax is provided for inserting, appending, or removing
elements for mutable arrays.
A class need not declare both methods in order to take advantage of this
language feature. For example, the class ``NSArray`` declares only
``objectAtIndexedSubscript:``, so that assignments to elements will fail
to type-check; moreover, its subclass ``NSMutableArray`` declares
``setObject:atIndexedSubscript:``.
Dictionary-Style Subscripting
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
When the subscript operand has an Objective-C object pointer type, the
expression is rewritten to use one of two different selectors, depending
on whether the element is being read from or written to. When an
expression reads an element using an Objective-C object pointer
subscript operand, as in the following example:
.. code-block:: objc
id key = ...;
id value = object[key];
it is translated into a call to the ``objectForKeyedSubscript:`` method:
.. code-block:: objc
id value = [object objectForKeyedSubscript:key];
When an expression writes an element using an Objective-C object pointer
subscript:
.. code-block:: objc
object[key] = newValue;
it is translated to a call to ``setObject:forKeyedSubscript:``
.. code-block:: objc
[object setObject:newValue forKeyedSubscript:key];
The behavior of ``setObject:forKeyedSubscript:`` is class-specific; but
in general it should replace an existing value if one is already
associated with a key, otherwise it should add a new value for the key.
No syntax is provided for removing elements from mutable dictionaries.
Discussion
----------
An Objective-C subscript expression occurs when the base operand of the
C subscript operator has an Objective-C object pointer type. Since this
potentially collides with pointer arithmetic on the value, these
expressions are only supported under the modern Objective-C runtime,
which categorically forbids such arithmetic.
Currently, only subscripts of integral or Objective-C object pointer
type are supported. In C++, a class type can be used if it has a single
conversion function to an integral or Objective-C pointer type, in which
case that conversion is applied and analysis continues as appropriate.
Otherwise, the expression is ill-formed.
An Objective-C object subscript expression is always an l-value. If the
expression appears on the left-hand side of a simple assignment operator
(=), the element is written as described below. If the expression
appears on the left-hand side of a compound assignment operator (e.g.
+=), the program is ill-formed, because the result of reading an element
is always an Objective-C object pointer and no binary operators are
legal on such pointers. If the expression appears in any other position,
the element is read as described below. It is an error to take the
address of a subscript expression, or (in C++) to bind a reference to
it.
Programs can use object subscripting with Objective-C object pointers of
type ``id``. Normal dynamic message send rules apply; the compiler must
see *some* declaration of the subscripting methods, and will pick the
declaration seen first.
Caveats
=======
Objects created using the literal or boxed expression syntax are not
guaranteed to be uniqued by the runtime, but nor are they guaranteed to
be newly-allocated. As such, the result of performing direct comparisons
against the location of an object literal (using ``==``, ``!=``, ``<``,
``<=``, ``>``, or ``>=``) is not well-defined. This is usually a simple
mistake in code that intended to call the ``isEqual:`` method (or the
``compare:`` method).
This caveat applies to compile-time string literals as well.
Historically, string literals (using the ``@"..."`` syntax) have been
uniqued across translation units during linking. This is an
implementation detail of the compiler and should not be relied upon. If
you are using such code, please use global string constants instead
(``NSString * const MyConst = @"..."``) or use ``isEqual:``.
Grammar Additions
=================
To support the new syntax described above, the Objective-C
``@``-expression grammar has the following new productions:
::
objc-at-expression : '@' (string-literal | encode-literal | selector-literal | protocol-literal | object-literal)
;
object-literal : ('+' | '-')? numeric-constant
| character-constant
| boolean-constant
| array-literal
| dictionary-literal
;
boolean-constant : '__objc_yes' | '__objc_no' | 'true' | 'false' /* boolean keywords. */
;
array-literal : '[' assignment-expression-list ']'
;
assignment-expression-list : assignment-expression (',' assignment-expression-list)?
| /* empty */
;
dictionary-literal : '{' key-value-list '}'
;
key-value-list : key-value-pair (',' key-value-list)?
| /* empty */
;
key-value-pair : assignment-expression ':' assignment-expression
;
Note: ``@true`` and ``@false`` are only supported in Objective-C++.
Availability Checks
===================
Programs test for the new features by using clang's \_\_has\_feature
checks. Here are examples of their use:
.. code-block:: objc
#if __has_feature(objc_array_literals)
// new way.
NSArray *elements = @[ @"H", @"He", @"O", @"C" ];
#else
// old way (equivalent).
id objects[] = { @"H", @"He", @"O", @"C" };
NSArray *elements = [NSArray arrayWithObjects:objects count:4];
#endif
#if __has_feature(objc_dictionary_literals)
// new way.
NSDictionary *masses = @{ @"H" : @1.0078, @"He" : @4.0026, @"O" : @15.9990, @"C" : @12.0096 };
#else
// old way (equivalent).
id keys[] = { @"H", @"He", @"O", @"C" };
id values[] = { [NSNumber numberWithDouble:1.0078], [NSNumber numberWithDouble:4.0026],
[NSNumber numberWithDouble:15.9990], [NSNumber numberWithDouble:12.0096] };
NSDictionary *masses = [NSDictionary dictionaryWithObjects:objects forKeys:keys count:4];
#endif
#if __has_feature(objc_subscripting)
NSUInteger i, count = elements.count;
for (i = 0; i < count; ++i) {
NSString *element = elements[i];
NSNumber *mass = masses[element];
NSLog(@"the mass of %@ is %@", element, mass);
}
#else
NSUInteger i, count = [elements count];
for (i = 0; i < count; ++i) {
NSString *element = [elements objectAtIndex:i];
NSNumber *mass = [masses objectForKey:element];
NSLog(@"the mass of %@ is %@", element, mass);
}
#endif
Code can use also ``__has_feature(objc_bool)`` to check for the
availability of numeric literals support. This checks for the new
``__objc_yes / __objc_no`` keywords, which enable the use of
``@YES / @NO`` literals.
To check whether boxed expressions are supported, use
``__has_feature(objc_boxed_expressions)`` feature macro.

View File

@ -1,658 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>Precompiled Header and Modules Internals</title>
<link type="text/css" rel="stylesheet" href="../menu.css">
<link type="text/css" rel="stylesheet" href="../content.css">
<style type="text/css">
td {
vertical-align: top;
}
</style>
</head>
<body>
<!--#include virtual="../menu.html.incl"-->
<div id="content">
<h1>Precompiled Header and Modules Internals</h1>
<p>This document describes the design and implementation of Clang's
precompiled headers (PCH) and modules. If you are interested in the end-user
view, please see the <a
href="UsersManual.html#precompiledheaders">User's Manual</a>.</p>
<p><b>Table of Contents</b></p>
<ul>
<li><a href="#usage">Using Precompiled Headers with
<tt>clang</tt></a></li>
<li><a href="#philosophy">Design Philosophy</a></li>
<li><a href="#contents">Serialized AST File Contents</a>
<ul>
<li><a href="#metadata">Metadata Block</a></li>
<li><a href="#sourcemgr">Source Manager Block</a></li>
<li><a href="#preprocessor">Preprocessor Block</a></li>
<li><a href="#types">Types Block</a></li>
<li><a href="#decls">Declarations Block</a></li>
<li><a href="#stmt">Statements and Expressions</a></li>
<li><a href="#idtable">Identifier Table Block</a></li>
<li><a href="#method-pool">Method Pool Block</a></li>
</ul>
</li>
<li><a href="#tendrils">AST Reader Integration Points</a></li>
<li><a href="#chained">Chained precompiled headers</a></li>
<li><a href="#modules">Modules</a></li>
</ul>
<h2 id="usage">Using Precompiled Headers with <tt>clang</tt></h2>
<p>The Clang compiler frontend, <tt>clang -cc1</tt>, supports two command line
options for generating and using PCH files.<p>
<p>To generate PCH files using <tt>clang -cc1</tt>, use the option
<b><tt>-emit-pch</tt></b>:
<pre> $ clang -cc1 test.h -emit-pch -o test.h.pch </pre>
<p>This option is transparently used by <tt>clang</tt> when generating
PCH files. The resulting PCH file contains the serialized form of the
compiler's internal representation after it has completed parsing and
semantic analysis. The PCH file can then be used as a prefix header
with the <b><tt>-include-pch</tt></b> option:</p>
<pre>
$ clang -cc1 -include-pch test.h.pch test.c -o test.s
</pre>
<h2 id="philosophy">Design Philosophy</h2>
<p>Precompiled headers are meant to improve overall compile times for
projects, so the design of precompiled headers is entirely driven by
performance concerns. The use case for precompiled headers is
relatively simple: when there is a common set of headers that is
included in nearly every source file in the project, we
<i>precompile</i> that bundle of headers into a single precompiled
header (PCH file). Then, when compiling the source files in the
project, we load the PCH file first (as a prefix header), which acts
as a stand-in for that bundle of headers.</p>
<p>A precompiled header implementation improves performance when:</p>
<ul>
<li>Loading the PCH file is significantly faster than re-parsing the
bundle of headers stored within the PCH file. Thus, a precompiled
header design attempts to minimize the cost of reading the PCH
file. Ideally, this cost should not vary with the size of the
precompiled header file.</li>
<li>The cost of generating the PCH file initially is not so large
that it counters the per-source-file performance improvement due to
eliminating the need to parse the bundled headers in the first
place. This is particularly important on multi-core systems, because
PCH file generation serializes the build when all compilations
require the PCH file to be up-to-date.</li>
</ul>
<p>Modules, as implemented in Clang, use the same mechanisms as
precompiled headers to save a serialized AST file (one per module) and
use those AST modules. From an implementation standpoint, modules are
a generalization of precompiled headers, lifting a number of
restrictions placed on precompiled headers. In particular, there can
only be one precompiled header and it must be included at the
beginning of the translation unit. The extensions to the AST file
format required for modules are discussed in the section on <a href="#modules">modules</a>.</p>
<p>Clang's AST files are designed with a compact on-disk
representation, which minimizes both creation time and the time
required to initially load the AST file. The AST file itself contains
a serialized representation of Clang's abstract syntax trees and
supporting data structures, stored using the same compressed bitstream
as <a href="http://llvm.org/docs/BitCodeFormat.html">LLVM's bitcode
file format</a>.</p>
<p>Clang's AST files are loaded "lazily" from disk. When an
AST file is initially loaded, Clang reads only a small amount of data
from the AST file to establish where certain important data structures
are stored. The amount of data read in this initial load is
independent of the size of the AST file, such that a larger AST file
does not lead to longer AST load times. The actual header data in the
AST file--macros, functions, variables, types, etc.--is loaded only
when it is referenced from the user's code, at which point only that
entity (and those entities it depends on) are deserialized from the
AST file. With this approach, the cost of using an AST file
for a translation unit is proportional to the amount of code actually
used from the AST file, rather than being proportional to the size of
the AST file itself.</p>
<p>When given the <code>-print-stats</code> option, Clang produces
statistics describing how much of the AST file was actually
loaded from disk. For a simple "Hello, World!" program that includes
the Apple <code>Cocoa.h</code> header (which is built as a precompiled
header), this option illustrates how little of the actual precompiled
header is required:</p>
<pre>
*** PCH Statistics:
933 stat cache hits
4 stat cache misses
895/39981 source location entries read (2.238563%)
19/15315 types read (0.124061%)
20/82685 declarations read (0.024188%)
154/58070 identifiers read (0.265197%)
0/7260 selectors read (0.000000%)
0/30842 statements read (0.000000%)
4/8400 macros read (0.047619%)
1/4995 lexical declcontexts read (0.020020%)
0/4413 visible declcontexts read (0.000000%)
0/7230 method pool entries read (0.000000%)
0 method pool misses
</pre>
<p>For this small program, only a tiny fraction of the source
locations, types, declarations, identifiers, and macros were actually
deserialized from the precompiled header. These statistics can be
useful to determine whether the AST file implementation can
be improved by making more of the implementation lazy.</p>
<p>Precompiled headers can be chained. When you create a PCH while
including an existing PCH, Clang can create the new PCH by referencing
the original file and only writing the new data to the new file. For
example, you could create a PCH out of all the headers that are very
commonly used throughout your project, and then create a PCH for every
single source file in the project that includes the code that is
specific to that file, so that recompiling the file itself is very fast,
without duplicating the data from the common headers for every
file. The mechanisms behind chained precompiled headers are discussed
in a <a href="#chained">later section</a>.
<h2 id="contents">AST File Contents</h2>
<img src="PCHLayout.png" style="float:right" alt="Precompiled header layout">
<p>Clang's AST files are organized into several different
blocks, each of which contains the serialized representation of a part
of Clang's internal representation. Each of the blocks corresponds to
either a block or a record within <a
href="http://llvm.org/docs/BitCodeFormat.html">LLVM's bitstream
format</a>. The contents of each of these logical blocks are described
below.</p>
<p>For a given AST file, the <a
href="http://llvm.org/cmds/llvm-bcanalyzer.html"><code>llvm-bcanalyzer</code></a>
utility can be used to examine the actual structure of the bitstream
for the AST file. This information can be used both to help
understand the structure of the AST file and to isolate
areas where AST files can still be optimized, e.g., through
the introduction of abbreviations.</p>
<h3 id="metadata">Metadata Block</h3>
<p>The metadata block contains several records that provide
information about how the AST file was built. This metadata
is primarily used to validate the use of an AST file. For
example, a precompiled header built for a 32-bit x86 target cannot be used
when compiling for a 64-bit x86 target. The metadata block contains
information about:</p>
<dl>
<dt>Language options</dt>
<dd>Describes the particular language dialect used to compile the
AST file, including major options (e.g., Objective-C support) and more
minor options (e.g., support for "//" comments). The contents of this
record correspond to the <code>LangOptions</code> class.</dd>
<dt>Target architecture</dt>
<dd>The target triple that describes the architecture, platform, and
ABI for which the AST file was generated, e.g.,
<code>i386-apple-darwin9</code>.</dd>
<dt>AST version</dt>
<dd>The major and minor version numbers of the AST file
format. Changes in the minor version number should not affect backward
compatibility, while changes in the major version number imply that a
newer compiler cannot read an older precompiled header (and
vice-versa).</dd>
<dt>Original file name</dt>
<dd>The full path of the header that was used to generate the
AST file.</dd>
<dt>Predefines buffer</dt>
<dd>Although not explicitly stored as part of the metadata, the
predefines buffer is used in the validation of the AST file.
The predefines buffer itself contains code generated by the compiler
to initialize the preprocessor state according to the current target,
platform, and command-line options. For example, the predefines buffer
will contain "<code>#define __STDC__ 1</code>" when we are compiling C
without Microsoft extensions. The predefines buffer itself is stored
within the <a href="#sourcemgr">source manager block</a>, but its
contents are verified along with the rest of the metadata.</dd>
</dl>
<p>A chained PCH file (that is, one that references another PCH) and a
module (which may import other modules) have additional metadata
containing the list of all AST files that this AST file depends
on. Each of those files will be loaded along with this AST file.</p>
<p>For chained precompiled headers, the language options, target
architecture and predefines buffer data is taken from the end of the
chain, since they have to match anyway.</p>
<h3 id="sourcemgr">Source Manager Block</h3>
<p>The source manager block contains the serialized representation of
Clang's <a
href="InternalsManual.html#SourceLocation">SourceManager</a> class,
which handles the mapping from source locations (as represented in
Clang's abstract syntax tree) into actual column/line positions within
a source file or macro instantiation. The AST file's
representation of the source manager also includes information about
all of the headers that were (transitively) included when building the
AST file.</p>
<p>The bulk of the source manager block is dedicated to information
about the various files, buffers, and macro instantiations into which
a source location can refer. Each of these is referenced by a numeric
"file ID", which is a unique number (allocated starting at 1) stored
in the source location. Clang serializes the information for each kind
of file ID, along with an index that maps file IDs to the position
within the AST file where the information about that file ID is
stored. The data associated with a file ID is loaded only when
required by the front end, e.g., to emit a diagnostic that includes a
macro instantiation history inside the header itself.</p>
<p>The source manager block also contains information about all of the
headers that were included when building the AST file. This
includes information about the controlling macro for the header (e.g.,
when the preprocessor identified that the contents of the header
dependent on a macro like <code>LLVM_CLANG_SOURCEMANAGER_H</code>)
along with a cached version of the results of the <code>stat()</code>
system calls performed when building the AST file. The
latter is particularly useful in reducing system time when searching
for include files.</p>
<h3 id="preprocessor">Preprocessor Block</h3>
<p>The preprocessor block contains the serialized representation of
the preprocessor. Specifically, it contains all of the macros that
have been defined by the end of the header used to build the
AST file, along with the token sequences that comprise each
macro. The macro definitions are only read from the AST file when the
name of the macro first occurs in the program. This lazy loading of
macro definitions is triggered by lookups into the <a
href="#idtable">identifier table</a>.</p>
<h3 id="types">Types Block</h3>
<p>The types block contains the serialized representation of all of
the types referenced in the translation unit. Each Clang type node
(<code>PointerType</code>, <code>FunctionProtoType</code>, etc.) has a
corresponding record type in the AST file. When types are deserialized
from the AST file, the data within the record is used to
reconstruct the appropriate type node using the AST context.</p>
<p>Each type has a unique type ID, which is an integer that uniquely
identifies that type. Type ID 0 represents the NULL type, type IDs
less than <code>NUM_PREDEF_TYPE_IDS</code> represent predefined types
(<code>void</code>, <code>float</code>, etc.), while other
"user-defined" type IDs are assigned consecutively from
<code>NUM_PREDEF_TYPE_IDS</code> upward as the types are encountered.
The AST file has an associated mapping from the user-defined types
block to the location within the types block where the serialized
representation of that type resides, enabling lazy deserialization of
types. When a type is referenced from within the AST file, that
reference is encoded using the type ID shifted left by 3 bits. The
lower three bits are used to represent the <code>const</code>,
<code>volatile</code>, and <code>restrict</code> qualifiers, as in
Clang's <a
href="http://clang.llvm.org/docs/InternalsManual.html#Type">QualType</a>
class.</p>
<h3 id="decls">Declarations Block</h3>
<p>The declarations block contains the serialized representation of
all of the declarations referenced in the translation unit. Each Clang
declaration node (<code>VarDecl</code>, <code>FunctionDecl</code>,
etc.) has a corresponding record type in the AST file. When
declarations are deserialized from the AST file, the data
within the record is used to build and populate a new instance of the
corresponding <code>Decl</code> node. As with types, each declaration
node has a numeric ID that is used to refer to that declaration within
the AST file. In addition, a lookup table provides a mapping from that
numeric ID to the offset within the precompiled header where that
declaration is described.</p>
<p>Declarations in Clang's abstract syntax trees are stored
hierarchically. At the top of the hierarchy is the translation unit
(<code>TranslationUnitDecl</code>), which contains all of the
declarations in the translation unit but is not actually written as a
specific declaration node. Its child declarations (such as
functions or struct types) may also contain other declarations inside
them, and so on. Within Clang, each declaration is stored within a <a
href="http://clang.llvm.org/docs/InternalsManual.html#DeclContext">declaration
context</a>, as represented by the <code>DeclContext</code> class.
Declaration contexts provide the mechanism to perform name lookup
within a given declaration (e.g., find the member named <code>x</code>
in a structure) and iterate over the declarations stored within a
context (e.g., iterate over all of the fields of a structure for
structure layout).</p>
<p>In Clang's AST file format, deserializing a declaration
that is a <code>DeclContext</code> is a separate operation from
deserializing all of the declarations stored within that declaration
context. Therefore, Clang will deserialize the translation unit
declaration without deserializing the declarations within that
translation unit. When required, the declarations stored within a
declaration context will be deserialized. There are two representations
of the declarations within a declaration context, which correspond to
the name-lookup and iteration behavior described above:</p>
<ul>
<li>When the front end performs name lookup to find a name
<code>x</code> within a given declaration context (for example,
during semantic analysis of the expression <code>p-&gt;x</code>,
where <code>p</code>'s type is defined in the precompiled header),
Clang refers to an on-disk hash table that maps from the names
within that declaration context to the declaration IDs that
represent each visible declaration with that name. The actual
declarations will then be deserialized to provide the results of
name lookup.</li>
<li>When the front end performs iteration over all of the
declarations within a declaration context, all of those declarations
are immediately de-serialized. For large declaration contexts (e.g.,
the translation unit), this operation is expensive; however, large
declaration contexts are not traversed in normal compilation, since
such a traversal is unnecessary. However, it is common for the code
generator and semantic analysis to traverse declaration contexts for
structs, classes, unions, and enumerations, although those contexts
contain relatively few declarations in the common case.</li>
</ul>
<h3 id="stmt">Statements and Expressions</h3>
<p>Statements and expressions are stored in the AST file in
both the <a href="#types">types</a> and the <a
href="#decls">declarations</a> blocks, because every statement or
expression will be associated with either a type or declaration. The
actual statement and expression records are stored immediately
following the declaration or type that owns the statement or
expression. For example, the statement representing the body of a
function will be stored directly following the declaration of the
function.</p>
<p>As with types and declarations, each statement and expression kind
in Clang's abstract syntax tree (<code>ForStmt</code>,
<code>CallExpr</code>, etc.) has a corresponding record type in the
AST file, which contains the serialized representation of
that statement or expression. Each substatement or subexpression
within an expression is stored as a separate record (which keeps most
records to a fixed size). Within the AST file, the
subexpressions of an expression are stored, in reverse order, prior to the expression
that owns those expression, using a form of <a
href="http://en.wikipedia.org/wiki/Reverse_Polish_notation">Reverse
Polish Notation</a>. For example, an expression <code>3 - 4 + 5</code>
would be represented as follows:</p>
<table border="1">
<tr><td><code>IntegerLiteral(5)</code></td></tr>
<tr><td><code>IntegerLiteral(4)</code></td></tr>
<tr><td><code>IntegerLiteral(3)</code></td></tr>
<tr><td><code>BinaryOperator(-)</code></td></tr>
<tr><td><code>BinaryOperator(+)</code></td></tr>
<tr><td>STOP</td></tr>
</table>
<p>When reading this representation, Clang evaluates each expression
record it encounters, builds the appropriate abstract syntax tree node,
and then pushes that expression on to a stack. When a record contains <i>N</i>
subexpressions--<code>BinaryOperator</code> has two of them--those
expressions are popped from the top of the stack. The special STOP
code indicates that we have reached the end of a serialized expression
or statement; other expression or statement records may follow, but
they are part of a different expression.</p>
<h3 id="idtable">Identifier Table Block</h3>
<p>The identifier table block contains an on-disk hash table that maps
each identifier mentioned within the AST file to the
serialized representation of the identifier's information (e.g, the
<code>IdentifierInfo</code> structure). The serialized representation
contains:</p>
<ul>
<li>The actual identifier string.</li>
<li>Flags that describe whether this identifier is the name of a
built-in, a poisoned identifier, an extension token, or a
macro.</li>
<li>If the identifier names a macro, the offset of the macro
definition within the <a href="#preprocessor">preprocessor
block</a>.</li>
<li>If the identifier names one or more declarations visible from
translation unit scope, the <a href="#decls">declaration IDs</a> of these
declarations.</li>
</ul>
<p>When an AST file is loaded, the AST file reader
mechanism introduces itself into the identifier table as an external
lookup source. Thus, when the user program refers to an identifier
that has not yet been seen, Clang will perform a lookup into the
identifier table. If an identifier is found, its contents (macro
definitions, flags, top-level declarations, etc.) will be
deserialized, at which point the corresponding
<code>IdentifierInfo</code> structure will have the same contents it
would have after parsing the headers in the AST file.</p>
<p>Within the AST file, the identifiers used to name declarations are represented with an integral value. A separate table provides a mapping from this integral value (the identifier ID) to the location within the on-disk
hash table where that identifier is stored. This mapping is used when
deserializing the name of a declaration, the identifier of a token, or
any other construct in the AST file that refers to a name.</p>
<h3 id="method-pool">Method Pool Block</h3>
<p>The method pool block is represented as an on-disk hash table that
serves two purposes: it provides a mapping from the names of
Objective-C selectors to the set of Objective-C instance and class
methods that have that particular selector (which is required for
semantic analysis in Objective-C) and also stores all of the selectors
used by entities within the AST file. The design of the
method pool is similar to that of the <a href="#idtable">identifier
table</a>: the first time a particular selector is formed during the
compilation of the program, Clang will search in the on-disk hash
table of selectors; if found, Clang will read the Objective-C methods
associated with that selector into the appropriate front-end data
structure (<code>Sema::InstanceMethodPool</code> and
<code>Sema::FactoryMethodPool</code> for instance and class methods,
respectively).</p>
<p>As with identifiers, selectors are represented by numeric values
within the AST file. A separate index maps these numeric selector
values to the offset of the selector within the on-disk hash table,
and will be used when de-serializing an Objective-C method declaration
(or other Objective-C construct) that refers to the selector.</p>
<h2 id="tendrils">AST Reader Integration Points</h2>
<p>The "lazy" deserialization behavior of AST files requires
their integration into several completely different submodules of
Clang. For example, lazily deserializing the declarations during name
lookup requires that the name-lookup routines be able to query the
AST file to find entities stored there.</p>
<p>For each Clang data structure that requires direct interaction with
the AST reader logic, there is an abstract class that provides
the interface between the two modules. The <code>ASTReader</code>
class, which handles the loading of an AST file, inherits
from all of these abstract classes to provide lazy deserialization of
Clang's data structures. <code>ASTReader</code> implements the
following abstract classes:</p>
<dl>
<dt><code>StatSysCallCache</code></dt>
<dd>This abstract interface is associated with the
<code>FileManager</code> class, and is used whenever the file
manager is going to perform a <code>stat()</code> system call.</dd>
<dt><code>ExternalSLocEntrySource</code></dt>
<dd>This abstract interface is associated with the
<code>SourceManager</code> class, and is used whenever the
<a href="#sourcemgr">source manager</a> needs to load the details
of a file, buffer, or macro instantiation.</dd>
<dt><code>IdentifierInfoLookup</code></dt>
<dd>This abstract interface is associated with the
<code>IdentifierTable</code> class, and is used whenever the
program source refers to an identifier that has not yet been seen.
In this case, the AST reader searches for
this identifier within its <a href="#idtable">identifier table</a>
to load any top-level declarations or macros associated with that
identifier.</dd>
<dt><code>ExternalASTSource</code></dt>
<dd>This abstract interface is associated with the
<code>ASTContext</code> class, and is used whenever the abstract
syntax tree nodes need to loaded from the AST file. It
provides the ability to de-serialize declarations and types
identified by their numeric values, read the bodies of functions
when required, and read the declarations stored within a
declaration context (either for iteration or for name lookup).</dd>
<dt><code>ExternalSemaSource</code></dt>
<dd>This abstract interface is associated with the <code>Sema</code>
class, and is used whenever semantic analysis needs to read
information from the <a href="#methodpool">global method
pool</a>.</dd>
</dl>
<h2 id="chained">Chained precompiled headers</h2>
<p>Chained precompiled headers were initially intended to improve the
performance of IDE-centric operations such as syntax highlighting and
code completion while a particular source file is being edited by the
user. To minimize the amount of reparsing required after a change to
the file, a form of precompiled header--called a precompiled
<i>preamble</i>--is automatically generated by parsing all of the
headers in the source file, up to and including the last
#include. When only the source file changes (and none of the headers
it depends on), reparsing of that source file can use the precompiled
preamble and start parsing after the #includes, so parsing time is
proportional to the size of the source file (rather than all of its
includes). However, the compilation of that translation unit
may already use a precompiled header: in this case, Clang will create
the precompiled preamble as a chained precompiled header that refers
to the original precompiled header. This drastically reduces the time
needed to serialize the precompiled preamble for use in reparsing.</p>
<p>Chained precompiled headers get their name because each precompiled header
can depend on one other precompiled header, forming a chain of
dependencies. A translation unit will then include the precompiled
header that starts the chain (i.e., nothing depends on it). This
linearity of dependencies is important for the semantic model of
chained precompiled headers, because the most-recent precompiled
header can provide information that overrides the information provided
by the precompiled headers it depends on, just like a header file
<code>B.h</code> that includes another header <code>A.h</code> can
modify the state produced by parsing <code>A.h</code>, e.g., by
<code>#undef</code>'ing a macro defined in <code>A.h</code>.</p>
<p>There are several ways in which chained precompiled headers
generalize the AST file model:</p>
<dl>
<dt>Numbering of IDs</dt>
<dd>Many different kinds of entities--identifiers, declarations,
types, etc.---have ID numbers that start at 1 or some other
predefined constant and grow upward. Each precompiled header records
the maximum ID number it has assigned in each category. Then, when a
new precompiled header is generated that depends on (chains to)
another precompiled header, it will start counting at the next
available ID number. This way, one can determine, given an ID
number, which AST file actually contains the entity.</dd>
<dt>Name lookup</dt>
<dd>When writing a chained precompiled header, Clang attempts to
write only information that has changed from the precompiled header
on which it is based. This changes the lookup algorithm for the
various tables, such as the <a href="#idtable">identifier table</a>:
the search starts at the most-recent precompiled header. If no entry
is found, lookup then proceeds to the identifier table in the
precompiled header it depends on, and so one. Once a lookup
succeeds, that result is considered definitive, overriding any
results from earlier precompiled headers.</dd>
<dt>Update records</dt>
<dd>There are various ways in which a later precompiled header can
modify the entities described in an earlier precompiled header. For
example, later precompiled headers can add entries into the various
name-lookup tables for the translation unit or namespaces, or add
new categories to an Objective-C class. Each of these updates is
captured in an "update record" that is stored in the chained
precompiled header file and will be loaded along with the original
entity.</dd>
</dl>
<h2 id="modules">Modules</h2>
<p>Modules generalize the chained precompiled header model yet
further, from a linear chain of precompiled headers to an arbitrary
directed acyclic graph (DAG) of AST files. All of the same techniques
used to make chained precompiled headers work---ID number, name
lookup, update records---are shared with modules. However, the DAG
nature of modules introduce a number of additional complications to
the model:
<dl>
<dt>Numbering of IDs</dt>
<dd>The simple, linear numbering scheme used in chained precompiled
headers falls apart with the module DAG, because different modules
may end up with different numbering schemes for entities they
imported from common shared modules. To account for this, each
module file provides information about which modules it depends on
and which ID numbers it assigned to the entities in those modules,
as well as which ID numbers it took for its own new entities. The
AST reader then maps these "local" ID numbers into a "global" ID
number space for the current translation unit, providing a 1-1
mapping between entities (in whatever AST file they inhabit) and
global ID numbers. If that translation unit is then serialized into
an AST file, this mapping will be stored for use when the AST file
is imported.</dd>
<dt>Declaration merging</dt>
<dd>It is possible for a given entity (from the language's
perspective) to be declared multiple times in different places. For
example, two different headers can have the declaration of
<tt>printf</tt> or could forward-declare <tt>struct stat</tt>. If
each of those headers is included in a module, and some third party
imports both of those modules, there is a potentially serious
problem: name lookup for <tt>printf</tt> or <tt>struct stat</tt> will
find both declarations, but the AST nodes are unrelated. This would
result in a compilation error, due to an ambiguity in name
lookup. Therefore, the AST reader performs declaration merging
according to the appropriate language semantics, ensuring that the
two disjoint declarations are merged into a single redeclaration
chain (with a common canonical declaration), so that it is as if one
of the headers had been included before the other.</dd>
<dt>Name Visibility</dt>
<dd>Modules allow certain names that occur during module creation to
be "hidden", so that they are not part of the public interface of
the module and are not visible to its clients. The AST reader
maintains a "visible" bit on various AST nodes (declarations, macros,
etc.) to indicate whether that particular AST node is currently
visible; the various name lookup mechanisms in Clang inspect the
visible bit to determine whether that entity, which is still in the
AST (because other, visible AST nodes may depend on it), can
actually be found by name lookup. When a new (sub)module is
imported, it may make existing, non-visible, already-deserialized
AST nodes visible; it is the responsibility of the AST reader to
find and update these AST nodes when it is notified of the import.</dd>
</dl>
</div>
</body>
</html>

561
docs/PCHInternals.rst Normal file
View File

@ -0,0 +1,561 @@
========================================
Precompiled Header and Modules Internals
========================================
.. contents::
:local:
This document describes the design and implementation of Clang's precompiled
headers (PCH) and modules. If you are interested in the end-user view, please
see the :ref:`User's Manual <usersmanual-precompiled-headers>`.
Using Precompiled Headers with ``clang``
----------------------------------------
The Clang compiler frontend, ``clang -cc1``, supports two command line options
for generating and using PCH files.
To generate PCH files using ``clang -cc1``, use the option :option:`-emit-pch`:
.. code-block:: bash
$ clang -cc1 test.h -emit-pch -o test.h.pch
This option is transparently used by ``clang`` when generating PCH files. The
resulting PCH file contains the serialized form of the compiler's internal
representation after it has completed parsing and semantic analysis. The PCH
file can then be used as a prefix header with the :option:`-include-pch`
option:
.. code-block:: bash
$ clang -cc1 -include-pch test.h.pch test.c -o test.s
Design Philosophy
-----------------
Precompiled headers are meant to improve overall compile times for projects, so
the design of precompiled headers is entirely driven by performance concerns.
The use case for precompiled headers is relatively simple: when there is a
common set of headers that is included in nearly every source file in the
project, we *precompile* that bundle of headers into a single precompiled
header (PCH file). Then, when compiling the source files in the project, we
load the PCH file first (as a prefix header), which acts as a stand-in for that
bundle of headers.
A precompiled header implementation improves performance when:
* Loading the PCH file is significantly faster than re-parsing the bundle of
headers stored within the PCH file. Thus, a precompiled header design
attempts to minimize the cost of reading the PCH file. Ideally, this cost
should not vary with the size of the precompiled header file.
* The cost of generating the PCH file initially is not so large that it
counters the per-source-file performance improvement due to eliminating the
need to parse the bundled headers in the first place. This is particularly
important on multi-core systems, because PCH file generation serializes the
build when all compilations require the PCH file to be up-to-date.
Modules, as implemented in Clang, use the same mechanisms as precompiled
headers to save a serialized AST file (one per module) and use those AST
modules. From an implementation standpoint, modules are a generalization of
precompiled headers, lifting a number of restrictions placed on precompiled
headers. In particular, there can only be one precompiled header and it must
be included at the beginning of the translation unit. The extensions to the
AST file format required for modules are discussed in the section on
:ref:`modules <pchinternals-modules>`.
Clang's AST files are designed with a compact on-disk representation, which
minimizes both creation time and the time required to initially load the AST
file. The AST file itself contains a serialized representation of Clang's
abstract syntax trees and supporting data structures, stored using the same
compressed bitstream as `LLVM's bitcode file format
<http://llvm.org/docs/BitCodeFormat.html>`_.
Clang's AST files are loaded "lazily" from disk. When an AST file is initially
loaded, Clang reads only a small amount of data from the AST file to establish
where certain important data structures are stored. The amount of data read in
this initial load is independent of the size of the AST file, such that a
larger AST file does not lead to longer AST load times. The actual header data
in the AST file --- macros, functions, variables, types, etc. --- is loaded
only when it is referenced from the user's code, at which point only that
entity (and those entities it depends on) are deserialized from the AST file.
With this approach, the cost of using an AST file for a translation unit is
proportional to the amount of code actually used from the AST file, rather than
being proportional to the size of the AST file itself.
When given the :option:`-print-stats` option, Clang produces statistics
describing how much of the AST file was actually loaded from disk. For a
simple "Hello, World!" program that includes the Apple ``Cocoa.h`` header
(which is built as a precompiled header), this option illustrates how little of
the actual precompiled header is required:
.. code-block:: none
*** AST File Statistics:
895/39981 source location entries read (2.238563%)
19/15315 types read (0.124061%)
20/82685 declarations read (0.024188%)
154/58070 identifiers read (0.265197%)
0/7260 selectors read (0.000000%)
0/30842 statements read (0.000000%)
4/8400 macros read (0.047619%)
1/4995 lexical declcontexts read (0.020020%)
0/4413 visible declcontexts read (0.000000%)
0/7230 method pool entries read (0.000000%)
0 method pool misses
For this small program, only a tiny fraction of the source locations, types,
declarations, identifiers, and macros were actually deserialized from the
precompiled header. These statistics can be useful to determine whether the
AST file implementation can be improved by making more of the implementation
lazy.
Precompiled headers can be chained. When you create a PCH while including an
existing PCH, Clang can create the new PCH by referencing the original file and
only writing the new data to the new file. For example, you could create a PCH
out of all the headers that are very commonly used throughout your project, and
then create a PCH for every single source file in the project that includes the
code that is specific to that file, so that recompiling the file itself is very
fast, without duplicating the data from the common headers for every file. The
mechanisms behind chained precompiled headers are discussed in a :ref:`later
section <pchinternals-chained>`.
AST File Contents
-----------------
Clang's AST files are organized into several different blocks, each of which
contains the serialized representation of a part of Clang's internal
representation. Each of the blocks corresponds to either a block or a record
within `LLVM's bitstream format <http://llvm.org/docs/BitCodeFormat.html>`_.
The contents of each of these logical blocks are described below.
.. image:: PCHLayout.png
For a given AST file, the `llvm-bcanalyzer
<http://llvm.org/docs/CommandGuide/llvm-bcanalyzer.html>`_ utility can be used
to examine the actual structure of the bitstream for the AST file. This
information can be used both to help understand the structure of the AST file
and to isolate areas where AST files can still be optimized, e.g., through the
introduction of abbreviations.
Metadata Block
^^^^^^^^^^^^^^
The metadata block contains several records that provide information about how
the AST file was built. This metadata is primarily used to validate the use of
an AST file. For example, a precompiled header built for a 32-bit x86 target
cannot be used when compiling for a 64-bit x86 target. The metadata block
contains information about:
Language options
Describes the particular language dialect used to compile the AST file,
including major options (e.g., Objective-C support) and more minor options
(e.g., support for "``//``" comments). The contents of this record correspond to
the ``LangOptions`` class.
Target architecture
The target triple that describes the architecture, platform, and ABI for
which the AST file was generated, e.g., ``i386-apple-darwin9``.
AST version
The major and minor version numbers of the AST file format. Changes in the
minor version number should not affect backward compatibility, while changes
in the major version number imply that a newer compiler cannot read an older
precompiled header (and vice-versa).
Original file name
The full path of the header that was used to generate the AST file.
Predefines buffer
Although not explicitly stored as part of the metadata, the predefines buffer
is used in the validation of the AST file. The predefines buffer itself
contains code generated by the compiler to initialize the preprocessor state
according to the current target, platform, and command-line options. For
example, the predefines buffer will contain "``#define __STDC__ 1``" when we
are compiling C without Microsoft extensions. The predefines buffer itself
is stored within the :ref:`pchinternals-sourcemgr`, but its contents are
verified along with the rest of the metadata.
A chained PCH file (that is, one that references another PCH) and a module
(which may import other modules) have additional metadata containing the list
of all AST files that this AST file depends on. Each of those files will be
loaded along with this AST file.
For chained precompiled headers, the language options, target architecture and
predefines buffer data is taken from the end of the chain, since they have to
match anyway.
.. _pchinternals-sourcemgr:
Source Manager Block
^^^^^^^^^^^^^^^^^^^^
The source manager block contains the serialized representation of Clang's
:ref:`SourceManager <SourceManager>` class, which handles the mapping from
source locations (as represented in Clang's abstract syntax tree) into actual
column/line positions within a source file or macro instantiation. The AST
file's representation of the source manager also includes information about all
of the headers that were (transitively) included when building the AST file.
The bulk of the source manager block is dedicated to information about the
various files, buffers, and macro instantiations into which a source location
can refer. Each of these is referenced by a numeric "file ID", which is a
unique number (allocated starting at 1) stored in the source location. Clang
serializes the information for each kind of file ID, along with an index that
maps file IDs to the position within the AST file where the information about
that file ID is stored. The data associated with a file ID is loaded only when
required by the front end, e.g., to emit a diagnostic that includes a macro
instantiation history inside the header itself.
The source manager block also contains information about all of the headers
that were included when building the AST file. This includes information about
the controlling macro for the header (e.g., when the preprocessor identified
that the contents of the header dependent on a macro like
``LLVM_CLANG_SOURCEMANAGER_H``).
.. _pchinternals-preprocessor:
Preprocessor Block
^^^^^^^^^^^^^^^^^^
The preprocessor block contains the serialized representation of the
preprocessor. Specifically, it contains all of the macros that have been
defined by the end of the header used to build the AST file, along with the
token sequences that comprise each macro. The macro definitions are only read
from the AST file when the name of the macro first occurs in the program. This
lazy loading of macro definitions is triggered by lookups into the
:ref:`identifier table <pchinternals-ident-table>`.
.. _pchinternals-types:
Types Block
^^^^^^^^^^^
The types block contains the serialized representation of all of the types
referenced in the translation unit. Each Clang type node (``PointerType``,
``FunctionProtoType``, etc.) has a corresponding record type in the AST file.
When types are deserialized from the AST file, the data within the record is
used to reconstruct the appropriate type node using the AST context.
Each type has a unique type ID, which is an integer that uniquely identifies
that type. Type ID 0 represents the NULL type, type IDs less than
``NUM_PREDEF_TYPE_IDS`` represent predefined types (``void``, ``float``, etc.),
while other "user-defined" type IDs are assigned consecutively from
``NUM_PREDEF_TYPE_IDS`` upward as the types are encountered. The AST file has
an associated mapping from the user-defined types block to the location within
the types block where the serialized representation of that type resides,
enabling lazy deserialization of types. When a type is referenced from within
the AST file, that reference is encoded using the type ID shifted left by 3
bits. The lower three bits are used to represent the ``const``, ``volatile``,
and ``restrict`` qualifiers, as in Clang's :ref:`QualType <QualType>` class.
.. _pchinternals-decls:
Declarations Block
^^^^^^^^^^^^^^^^^^
The declarations block contains the serialized representation of all of the
declarations referenced in the translation unit. Each Clang declaration node
(``VarDecl``, ``FunctionDecl``, etc.) has a corresponding record type in the
AST file. When declarations are deserialized from the AST file, the data
within the record is used to build and populate a new instance of the
corresponding ``Decl`` node. As with types, each declaration node has a
numeric ID that is used to refer to that declaration within the AST file. In
addition, a lookup table provides a mapping from that numeric ID to the offset
within the precompiled header where that declaration is described.
Declarations in Clang's abstract syntax trees are stored hierarchically. At
the top of the hierarchy is the translation unit (``TranslationUnitDecl``),
which contains all of the declarations in the translation unit but is not
actually written as a specific declaration node. Its child declarations (such
as functions or struct types) may also contain other declarations inside them,
and so on. Within Clang, each declaration is stored within a :ref:`declaration
context <DeclContext>`, as represented by the ``DeclContext`` class.
Declaration contexts provide the mechanism to perform name lookup within a
given declaration (e.g., find the member named ``x`` in a structure) and
iterate over the declarations stored within a context (e.g., iterate over all
of the fields of a structure for structure layout).
In Clang's AST file format, deserializing a declaration that is a
``DeclContext`` is a separate operation from deserializing all of the
declarations stored within that declaration context. Therefore, Clang will
deserialize the translation unit declaration without deserializing the
declarations within that translation unit. When required, the declarations
stored within a declaration context will be deserialized. There are two
representations of the declarations within a declaration context, which
correspond to the name-lookup and iteration behavior described above:
* When the front end performs name lookup to find a name ``x`` within a given
declaration context (for example, during semantic analysis of the expression
``p->x``, where ``p``'s type is defined in the precompiled header), Clang
refers to an on-disk hash table that maps from the names within that
declaration context to the declaration IDs that represent each visible
declaration with that name. The actual declarations will then be
deserialized to provide the results of name lookup.
* When the front end performs iteration over all of the declarations within a
declaration context, all of those declarations are immediately
de-serialized. For large declaration contexts (e.g., the translation unit),
this operation is expensive; however, large declaration contexts are not
traversed in normal compilation, since such a traversal is unnecessary.
However, it is common for the code generator and semantic analysis to
traverse declaration contexts for structs, classes, unions, and
enumerations, although those contexts contain relatively few declarations in
the common case.
Statements and Expressions
^^^^^^^^^^^^^^^^^^^^^^^^^^
Statements and expressions are stored in the AST file in both the :ref:`types
<pchinternals-types>` and the :ref:`declarations <pchinternals-decls>` blocks,
because every statement or expression will be associated with either a type or
declaration. The actual statement and expression records are stored
immediately following the declaration or type that owns the statement or
expression. For example, the statement representing the body of a function
will be stored directly following the declaration of the function.
As with types and declarations, each statement and expression kind in Clang's
abstract syntax tree (``ForStmt``, ``CallExpr``, etc.) has a corresponding
record type in the AST file, which contains the serialized representation of
that statement or expression. Each substatement or subexpression within an
expression is stored as a separate record (which keeps most records to a fixed
size). Within the AST file, the subexpressions of an expression are stored, in
reverse order, prior to the expression that owns those expression, using a form
of `Reverse Polish Notation
<http://en.wikipedia.org/wiki/Reverse_Polish_notation>`_. For example, an
expression ``3 - 4 + 5`` would be represented as follows:
+-----------------------+
| ``IntegerLiteral(5)`` |
+-----------------------+
| ``IntegerLiteral(4)`` |
+-----------------------+
| ``IntegerLiteral(3)`` |
+-----------------------+
| ``IntegerLiteral(-)`` |
+-----------------------+
| ``IntegerLiteral(+)`` |
+-----------------------+
| ``STOP`` |
+-----------------------+
When reading this representation, Clang evaluates each expression record it
encounters, builds the appropriate abstract syntax tree node, and then pushes
that expression on to a stack. When a record contains *N* subexpressions ---
``BinaryOperator`` has two of them --- those expressions are popped from the
top of the stack. The special STOP code indicates that we have reached the end
of a serialized expression or statement; other expression or statement records
may follow, but they are part of a different expression.
.. _pchinternals-ident-table:
Identifier Table Block
^^^^^^^^^^^^^^^^^^^^^^
The identifier table block contains an on-disk hash table that maps each
identifier mentioned within the AST file to the serialized representation of
the identifier's information (e.g, the ``IdentifierInfo`` structure). The
serialized representation contains:
* The actual identifier string.
* Flags that describe whether this identifier is the name of a built-in, a
poisoned identifier, an extension token, or a macro.
* If the identifier names a macro, the offset of the macro definition within
the :ref:`pchinternals-preprocessor`.
* If the identifier names one or more declarations visible from translation
unit scope, the :ref:`declaration IDs <pchinternals-decls>` of these
declarations.
When an AST file is loaded, the AST file reader mechanism introduces itself
into the identifier table as an external lookup source. Thus, when the user
program refers to an identifier that has not yet been seen, Clang will perform
a lookup into the identifier table. If an identifier is found, its contents
(macro definitions, flags, top-level declarations, etc.) will be deserialized,
at which point the corresponding ``IdentifierInfo`` structure will have the
same contents it would have after parsing the headers in the AST file.
Within the AST file, the identifiers used to name declarations are represented
with an integral value. A separate table provides a mapping from this integral
value (the identifier ID) to the location within the on-disk hash table where
that identifier is stored. This mapping is used when deserializing the name of
a declaration, the identifier of a token, or any other construct in the AST
file that refers to a name.
.. _pchinternals-method-pool:
Method Pool Block
^^^^^^^^^^^^^^^^^
The method pool block is represented as an on-disk hash table that serves two
purposes: it provides a mapping from the names of Objective-C selectors to the
set of Objective-C instance and class methods that have that particular
selector (which is required for semantic analysis in Objective-C) and also
stores all of the selectors used by entities within the AST file. The design
of the method pool is similar to that of the :ref:`identifier table
<pchinternals-ident-table>`: the first time a particular selector is formed
during the compilation of the program, Clang will search in the on-disk hash
table of selectors; if found, Clang will read the Objective-C methods
associated with that selector into the appropriate front-end data structure
(``Sema::InstanceMethodPool`` and ``Sema::FactoryMethodPool`` for instance and
class methods, respectively).
As with identifiers, selectors are represented by numeric values within the AST
file. A separate index maps these numeric selector values to the offset of the
selector within the on-disk hash table, and will be used when de-serializing an
Objective-C method declaration (or other Objective-C construct) that refers to
the selector.
AST Reader Integration Points
-----------------------------
The "lazy" deserialization behavior of AST files requires their integration
into several completely different submodules of Clang. For example, lazily
deserializing the declarations during name lookup requires that the name-lookup
routines be able to query the AST file to find entities stored there.
For each Clang data structure that requires direct interaction with the AST
reader logic, there is an abstract class that provides the interface between
the two modules. The ``ASTReader`` class, which handles the loading of an AST
file, inherits from all of these abstract classes to provide lazy
deserialization of Clang's data structures. ``ASTReader`` implements the
following abstract classes:
``ExternalSLocEntrySource``
This abstract interface is associated with the ``SourceManager`` class, and
is used whenever the :ref:`source manager <pchinternals-sourcemgr>` needs to
load the details of a file, buffer, or macro instantiation.
``IdentifierInfoLookup``
This abstract interface is associated with the ``IdentifierTable`` class, and
is used whenever the program source refers to an identifier that has not yet
been seen. In this case, the AST reader searches for this identifier within
its :ref:`identifier table <pchinternals-ident-table>` to load any top-level
declarations or macros associated with that identifier.
``ExternalASTSource``
This abstract interface is associated with the ``ASTContext`` class, and is
used whenever the abstract syntax tree nodes need to loaded from the AST
file. It provides the ability to de-serialize declarations and types
identified by their numeric values, read the bodies of functions when
required, and read the declarations stored within a declaration context
(either for iteration or for name lookup).
``ExternalSemaSource``
This abstract interface is associated with the ``Sema`` class, and is used
whenever semantic analysis needs to read information from the :ref:`global
method pool <pchinternals-method-pool>`.
.. _pchinternals-chained:
Chained precompiled headers
---------------------------
Chained precompiled headers were initially intended to improve the performance
of IDE-centric operations such as syntax highlighting and code completion while
a particular source file is being edited by the user. To minimize the amount
of reparsing required after a change to the file, a form of precompiled header
--- called a precompiled *preamble* --- is automatically generated by parsing
all of the headers in the source file, up to and including the last
``#include``. When only the source file changes (and none of the headers it
depends on), reparsing of that source file can use the precompiled preamble and
start parsing after the ``#include``\ s, so parsing time is proportional to the
size of the source file (rather than all of its includes). However, the
compilation of that translation unit may already use a precompiled header: in
this case, Clang will create the precompiled preamble as a chained precompiled
header that refers to the original precompiled header. This drastically
reduces the time needed to serialize the precompiled preamble for use in
reparsing.
Chained precompiled headers get their name because each precompiled header can
depend on one other precompiled header, forming a chain of dependencies. A
translation unit will then include the precompiled header that starts the chain
(i.e., nothing depends on it). This linearity of dependencies is important for
the semantic model of chained precompiled headers, because the most-recent
precompiled header can provide information that overrides the information
provided by the precompiled headers it depends on, just like a header file
``B.h`` that includes another header ``A.h`` can modify the state produced by
parsing ``A.h``, e.g., by ``#undef``'ing a macro defined in ``A.h``.
There are several ways in which chained precompiled headers generalize the AST
file model:
Numbering of IDs
Many different kinds of entities --- identifiers, declarations, types, etc.
--- have ID numbers that start at 1 or some other predefined constant and
grow upward. Each precompiled header records the maximum ID number it has
assigned in each category. Then, when a new precompiled header is generated
that depends on (chains to) another precompiled header, it will start
counting at the next available ID number. This way, one can determine, given
an ID number, which AST file actually contains the entity.
Name lookup
When writing a chained precompiled header, Clang attempts to write only
information that has changed from the precompiled header on which it is
based. This changes the lookup algorithm for the various tables, such as the
:ref:`identifier table <pchinternals-ident-table>`: the search starts at the
most-recent precompiled header. If no entry is found, lookup then proceeds
to the identifier table in the precompiled header it depends on, and so one.
Once a lookup succeeds, that result is considered definitive, overriding any
results from earlier precompiled headers.
Update records
There are various ways in which a later precompiled header can modify the
entities described in an earlier precompiled header. For example, later
precompiled headers can add entries into the various name-lookup tables for
the translation unit or namespaces, or add new categories to an Objective-C
class. Each of these updates is captured in an "update record" that is
stored in the chained precompiled header file and will be loaded along with
the original entity.
.. _pchinternals-modules:
Modules
-------
Modules generalize the chained precompiled header model yet further, from a
linear chain of precompiled headers to an arbitrary directed acyclic graph
(DAG) of AST files. All of the same techniques used to make chained
precompiled headers work --- ID number, name lookup, update records --- are
shared with modules. However, the DAG nature of modules introduce a number of
additional complications to the model:
Numbering of IDs
The simple, linear numbering scheme used in chained precompiled headers falls
apart with the module DAG, because different modules may end up with
different numbering schemes for entities they imported from common shared
modules. To account for this, each module file provides information about
which modules it depends on and which ID numbers it assigned to the entities
in those modules, as well as which ID numbers it took for its own new
entities. The AST reader then maps these "local" ID numbers into a "global"
ID number space for the current translation unit, providing a 1-1 mapping
between entities (in whatever AST file they inhabit) and global ID numbers.
If that translation unit is then serialized into an AST file, this mapping
will be stored for use when the AST file is imported.
Declaration merging
It is possible for a given entity (from the language's perspective) to be
declared multiple times in different places. For example, two different
headers can have the declaration of ``printf`` or could forward-declare
``struct stat``. If each of those headers is included in a module, and some
third party imports both of those modules, there is a potentially serious
problem: name lookup for ``printf`` or ``struct stat`` will find both
declarations, but the AST nodes are unrelated. This would result in a
compilation error, due to an ambiguity in name lookup. Therefore, the AST
reader performs declaration merging according to the appropriate language
semantics, ensuring that the two disjoint declarations are merged into a
single redeclaration chain (with a common canonical declaration), so that it
is as if one of the headers had been included before the other.
Name Visibility
Modules allow certain names that occur during module creation to be "hidden",
so that they are not part of the public interface of the module and are not
visible to its clients. The AST reader maintains a "visible" bit on various
AST nodes (declarations, macros, etc.) to indicate whether that particular
AST node is currently visible; the various name lookup mechanisms in Clang
inspect the visible bit to determine whether that entity, which is still in
the AST (because other, visible AST nodes may depend on it), can actually be
found by name lookup. When a new (sub)module is imported, it may make
existing, non-visible, already-deserialized AST nodes visible; it is the
responsibility of the AST reader to find and update these AST nodes when it
is notified of the import.

View File

@ -1,179 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>Pretokenized Headers (PTH)</title>
<link type="text/css" rel="stylesheet" href="../menu.css">
<link type="text/css" rel="stylesheet" href="../content.css">
<style type="text/css">
td {
vertical-align: top;
}
</style>
</head>
<body>
<!--#include virtual="../menu.html.incl"-->
<div id="content">
<h1>Pretokenized Headers (PTH)</h1>
<p>This document first describes the low-level
interface for using PTH and then briefly elaborates on its design and
implementation. If you are interested in the end-user view, please see the
<a href="UsersManual.html#precompiledheaders">User's Manual</a>.</p>
<h2>Using Pretokenized Headers with <tt>clang</tt> (Low-level Interface)</h2>
<p>The Clang compiler frontend, <tt>clang -cc1</tt>, supports three command line
options for generating and using PTH files.<p>
<p>To generate PTH files using <tt>clang -cc1</tt>, use the option
<b><tt>-emit-pth</tt></b>:
<pre> $ clang -cc1 test.h -emit-pth -o test.h.pth </pre>
<p>This option is transparently used by <tt>clang</tt> when generating PTH
files. Similarly, PTH files can be used as prefix headers using the
<b><tt>-include-pth</tt></b> option:</p>
<pre>
$ clang -cc1 -include-pth test.h.pth test.c -o test.s
</pre>
<p>Alternatively, Clang's PTH files can be used as a raw &quot;token-cache&quot;
(or &quot;content&quot; cache) of the source included by the original header
file. This means that the contents of the PTH file are searched as substitutes
for <em>any</em> source files that are used by <tt>clang -cc1</tt> to process a
source file. This is done by specifying the <b><tt>-token-cache</tt></b>
option:</p>
<pre>
$ cat test.h
#include &lt;stdio.h&gt;
$ clang -cc1 -emit-pth test.h -o test.h.pth
$ cat test.c
#include "test.h"
$ clang -cc1 test.c -o test -token-cache test.h.pth
</pre>
<p>In this example the contents of <tt>stdio.h</tt> (and the files it includes)
will be retrieved from <tt>test.h.pth</tt>, as the PTH file is being used in
this case as a raw cache of the contents of <tt>test.h</tt>. This is a low-level
interface used to both implement the high-level PTH interface as well as to
provide alternative means to use PTH-style caching.</p>
<h2>PTH Design and Implementation</h2>
<p>Unlike GCC's precompiled headers, which cache the full ASTs and preprocessor
state of a header file, Clang's pretokenized header files mainly cache the raw
lexer <em>tokens</em> that are needed to segment the stream of characters in a
source file into keywords, identifiers, and operators. Consequently, PTH serves
to mainly directly speed up the lexing and preprocessing of a source file, while
parsing and type-checking must be completely redone every time a PTH file is
used.</p>
<h3>Basic Design Tradeoffs</h3>
<p>In the long term there are plans to provide an alternate PCH implementation
for Clang that also caches the work for parsing and type checking the contents
of header files. The current implementation of PCH in Clang as pretokenized
header files was motivated by the following factors:<p>
<ul>
<li><p><b>Language independence</b>: PTH files work with any language that
Clang's lexer can handle, including C, Objective-C, and (in the early stages)
C++. This means development on language features at the parsing level or above
(which is basically almost all interesting pieces) does not require PTH to be
modified.</p></li>
<li><b>Simple design</b>: Relatively speaking, PTH has a simple design and
implementation, making it easy to test. Further, because the machinery for PTH
resides at the lower-levels of the Clang library stack it is fairly
straightforward to profile and optimize.</li>
</ul>
<p>Further, compared to GCC's PCH implementation (which is the dominate
precompiled header file implementation that Clang can be directly compared
against) the PTH design in Clang yields several attractive features:</p>
<ul>
<li><p><b>Architecture independence</b>: In contrast to GCC's PCH files (and
those of several other compilers), Clang's PTH files are architecture
independent, requiring only a single PTH file when building an program for
multiple architectures.</p>
<p>For example, on Mac OS X one may wish to
compile a &quot;universal binary&quot; that runs on PowerPC, 32-bit Intel
(i386), and 64-bit Intel architectures. In contrast, GCC requires a PCH file for
each architecture, as the definitions of types in the AST are
architecture-specific. Since a Clang PTH file essentially represents a lexical
cache of header files, a single PTH file can be safely used when compiling for
multiple architectures. This can also reduce compile times because only a single
PTH file needs to be generated during a build instead of several.</p></li>
<li><p><b>Reduced memory pressure</b>: Similar to GCC,
Clang reads PTH files via the use of memory mapping (i.e., <tt>mmap</tt>).
Clang, however, memory maps PTH files as read-only, meaning that multiple
invocations of <tt>clang -cc1</tt> can share the same pages in memory from a
memory-mapped PTH file. In comparison, GCC also memory maps its PCH files but
also modifies those pages in memory, incurring the copy-on-write costs. The
read-only nature of PTH can greatly reduce memory pressure for builds involving
multiple cores, thus improving overall scalability.</p></li>
<li><p><b>Fast generation</b>: PTH files can be generated in a small fraction
of the time needed to generate GCC's PCH files. Since PTH/PCH generation is a
serial operation that typically blocks progress during a build, faster
generation time leads to improved processor utilization with parallel builds on
multicore machines.</p></li>
</ul>
<p>Despite these strengths, PTH's simple design suffers some algorithmic
handicaps compared to other PCH strategies such as those used by GCC. While PTH
can greatly speed up the processing time of a header file, the amount of work
required to process a header file is still roughly linear in the size of the
header file. In contrast, the amount of work done by GCC to process a
precompiled header is (theoretically) constant (the ASTs for the header are
literally memory mapped into the compiler). This means that only the pieces of
the header file that are referenced by the source file including the header are
the only ones the compiler needs to process during actual compilation. While
GCC's particular implementation of PCH mitigates some of these algorithmic
strengths via the use of copy-on-write pages, the approach itself can
fundamentally dominate at an algorithmic level, especially when one considers
header files of arbitrary size.</p>
<p>There are plans to potentially implement an complementary PCH implementation
for Clang based on the lazy deserialization of ASTs. This approach would
theoretically have the same constant-time algorithmic advantages just mentioned
but would also retain some of the strengths of PTH such as reduced memory
pressure (ideal for multi-core builds).</p>
<h3>Internal PTH Optimizations</h3>
<p>While the main optimization employed by PTH is to reduce lexing time of
header files by caching pre-lexed tokens, PTH also employs several other
optimizations to speed up the processing of header files:</p>
<ul>
<li><p><em><tt>stat</tt> caching</em>: PTH files cache information obtained via
calls to <tt>stat</tt> that <tt>clang -cc1</tt> uses to resolve which files are
included by <tt>#include</tt> directives. This greatly reduces the overhead
involved in context-switching to the kernel to resolve included files.</p></li>
<li><p><em>Fasting skipping of <tt>#ifdef</tt>...<tt>#endif</tt> chains</em>:
PTH files record the basic structure of nested preprocessor blocks. When the
condition of the preprocessor block is false, all of its tokens are immediately
skipped instead of requiring them to be handled by Clang's
preprocessor.</p></li>
</ul>
</div>
</body>
</html>

163
docs/PTHInternals.rst Normal file
View File

@ -0,0 +1,163 @@
==========================
Pretokenized Headers (PTH)
==========================
This document first describes the low-level interface for using PTH and
then briefly elaborates on its design and implementation. If you are
interested in the end-user view, please see the :ref:`User's Manual
<usersmanual-precompiled-headers>`.
Using Pretokenized Headers with ``clang`` (Low-level Interface)
===============================================================
The Clang compiler frontend, ``clang -cc1``, supports three command line
options for generating and using PTH files.
To generate PTH files using ``clang -cc1``, use the option ``-emit-pth``:
.. code-block:: console
$ clang -cc1 test.h -emit-pth -o test.h.pth
This option is transparently used by ``clang`` when generating PTH
files. Similarly, PTH files can be used as prefix headers using the
``-include-pth`` option:
.. code-block:: console
$ clang -cc1 -include-pth test.h.pth test.c -o test.s
Alternatively, Clang's PTH files can be used as a raw "token-cache" (or
"content" cache) of the source included by the original header file.
This means that the contents of the PTH file are searched as substitutes
for *any* source files that are used by ``clang -cc1`` to process a
source file. This is done by specifying the ``-token-cache`` option:
.. code-block:: console
$ cat test.h
#include <stdio.h>
$ clang -cc1 -emit-pth test.h -o test.h.pth
$ cat test.c
#include "test.h"
$ clang -cc1 test.c -o test -token-cache test.h.pth
In this example the contents of ``stdio.h`` (and the files it includes)
will be retrieved from ``test.h.pth``, as the PTH file is being used in
this case as a raw cache of the contents of ``test.h``. This is a
low-level interface used to both implement the high-level PTH interface
as well as to provide alternative means to use PTH-style caching.
PTH Design and Implementation
=============================
Unlike GCC's precompiled headers, which cache the full ASTs and
preprocessor state of a header file, Clang's pretokenized header files
mainly cache the raw lexer *tokens* that are needed to segment the
stream of characters in a source file into keywords, identifiers, and
operators. Consequently, PTH serves to mainly directly speed up the
lexing and preprocessing of a source file, while parsing and
type-checking must be completely redone every time a PTH file is used.
Basic Design Tradeoffs
----------------------
In the long term there are plans to provide an alternate PCH
implementation for Clang that also caches the work for parsing and type
checking the contents of header files. The current implementation of PCH
in Clang as pretokenized header files was motivated by the following
factors:
**Language independence**
PTH files work with any language that
Clang's lexer can handle, including C, Objective-C, and (in the early
stages) C++. This means development on language features at the
parsing level or above (which is basically almost all interesting
pieces) does not require PTH to be modified.
**Simple design**
Relatively speaking, PTH has a simple design and
implementation, making it easy to test. Further, because the
machinery for PTH resides at the lower-levels of the Clang library
stack it is fairly straightforward to profile and optimize.
Further, compared to GCC's PCH implementation (which is the dominate
precompiled header file implementation that Clang can be directly
compared against) the PTH design in Clang yields several attractive
features:
**Architecture independence**
In contrast to GCC's PCH files (and
those of several other compilers), Clang's PTH files are architecture
independent, requiring only a single PTH file when building a
program for multiple architectures.
For example, on Mac OS X one may wish to compile a "universal binary"
that runs on PowerPC, 32-bit Intel (i386), and 64-bit Intel
architectures. In contrast, GCC requires a PCH file for each
architecture, as the definitions of types in the AST are
architecture-specific. Since a Clang PTH file essentially represents
a lexical cache of header files, a single PTH file can be safely used
when compiling for multiple architectures. This can also reduce
compile times because only a single PTH file needs to be generated
during a build instead of several.
**Reduced memory pressure**
Similar to GCC, Clang reads PTH files
via the use of memory mapping (i.e., ``mmap``). Clang, however,
memory maps PTH files as read-only, meaning that multiple invocations
of ``clang -cc1`` can share the same pages in memory from a
memory-mapped PTH file. In comparison, GCC also memory maps its PCH
files but also modifies those pages in memory, incurring the
copy-on-write costs. The read-only nature of PTH can greatly reduce
memory pressure for builds involving multiple cores, thus improving
overall scalability.
**Fast generation**
PTH files can be generated in a small fraction
of the time needed to generate GCC's PCH files. Since PTH/PCH
generation is a serial operation that typically blocks progress
during a build, faster generation time leads to improved processor
utilization with parallel builds on multicore machines.
Despite these strengths, PTH's simple design suffers some algorithmic
handicaps compared to other PCH strategies such as those used by GCC.
While PTH can greatly speed up the processing time of a header file, the
amount of work required to process a header file is still roughly linear
in the size of the header file. In contrast, the amount of work done by
GCC to process a precompiled header is (theoretically) constant (the
ASTs for the header are literally memory mapped into the compiler). This
means that only the pieces of the header file that are referenced by the
source file including the header are the only ones the compiler needs to
process during actual compilation. While GCC's particular implementation
of PCH mitigates some of these algorithmic strengths via the use of
copy-on-write pages, the approach itself can fundamentally dominate at
an algorithmic level, especially when one considers header files of
arbitrary size.
There are plans to potentially implement an complementary PCH
implementation for Clang based on the lazy deserialization of ASTs. This
approach would theoretically have the same constant-time algorithmic
advantages just mentioned but would also retain some of the strengths of
PTH such as reduced memory pressure (ideal for multi-core builds).
Internal PTH Optimizations
--------------------------
While the main optimization employed by PTH is to reduce lexing time of
header files by caching pre-lexed tokens, PTH also employs several other
optimizations to speed up the processing of header files:
- ``stat`` caching: PTH files cache information obtained via calls to
``stat`` that ``clang -cc1`` uses to resolve which files are included
by ``#include`` directives. This greatly reduces the overhead
involved in context-switching to the kernel to resolve included
files.
- Fast skipping of ``#ifdef`` ... ``#endif`` chains: PTH files
record the basic structure of nested preprocessor blocks. When the
condition of the preprocessor block is false, all of its tokens are
immediately skipped instead of requiring them to be handled by
Clang's preprocessor.

View File

@ -1,224 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>How to write RecursiveASTVisitor based ASTFrontendActions.</title>
<link type="text/css" rel="stylesheet" href="../menu.css">
<link type="text/css" rel="stylesheet" href="../content.css">
</head>
<body>
<!--#include virtual="../menu.html.incl"-->
<div id="content">
<h1>How to write RecursiveASTVisitor based ASTFrontendActions.</h1>
<!-- ======================================================================= -->
<h2 id="intro">Introduction</h2>
<!-- ======================================================================= -->
In this tutorial you will learn how to create a FrontendAction that uses
a RecursiveASTVisitor to find CXXRecordDecl AST nodes with a specified name.
<!-- ======================================================================= -->
<h2 id="action">Creating a FrontendAction</h2>
<!-- ======================================================================= -->
<p>When writing a clang based tool like a Clang Plugin or a standalone tool
based on LibTooling, the common entry point is the FrontendAction.
FrontendAction is an interface that allows execution of user specific actions
as part of the compilation. To run tools over the AST clang provides the
convenience interface ASTFrontendAction, which takes care of executing the
action. The only part left is to implement the CreateASTConsumer method that
returns an ASTConsumer per translation unit.</p>
<pre>
class FindNamedClassAction : public clang::ASTFrontendAction {
public:
virtual clang::ASTConsumer *CreateASTConsumer(
clang::CompilerInstance &amp;Compiler, llvm::StringRef InFile) {
return new FindNamedClassConsumer;
}
};
</pre>
<!-- ======================================================================= -->
<h2 id="consumer">Creating an ASTConsumer</h2>
<!-- ======================================================================= -->
<p>ASTConsumer is an interface used to write generic actions on an AST,
regardless of how the AST was produced. ASTConsumer provides many different
entry points, but for our use case the only one needed is HandleTranslationUnit,
which is called with the ASTContext for the translation unit.</p>
<pre>
class FindNamedClassConsumer : public clang::ASTConsumer {
public:
virtual void HandleTranslationUnit(clang::ASTContext &amp;Context) {
// Traversing the translation unit decl via a RecursiveASTVisitor
// will visit all nodes in the AST.
Visitor.TraverseDecl(Context.getTranslationUnitDecl());
}
private:
// A RecursiveASTVisitor implementation.
FindNamedClassVisitor Visitor;
};
</pre>
<!-- ======================================================================= -->
<h2 id="rav">Using the RecursiveASTVisitor</h2>
<!-- ======================================================================= -->
<p>Now that everything is hooked up, the next step is to implement a
RecursiveASTVisitor to extract the relevant information from the AST.</p>
<p>The RecursiveASTVisitor provides hooks of the form
bool VisitNodeType(NodeType *) for most AST nodes; the exception are TypeLoc
nodes, which are passed by-value. We only need to implement the methods for the
relevant node types.
</p>
<p>Let's start by writing a RecursiveASTVisitor that visits all CXXRecordDecl's.
<pre>
class FindNamedClassVisitor
: public RecursiveASTVisitor&lt;FindNamedClassVisitor> {
public:
bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
// For debugging, dumping the AST nodes will show which nodes are already
// being visited.
Declaration->dump();
// The return value indicates whether we want the visitation to proceed.
// Return false to stop the traversal of the AST.
return true;
}
};
</pre>
</p>
<p>In the methods of our RecursiveASTVisitor we can now use the full power of
the Clang AST to drill through to the parts that are interesting for us. For
example, to find all class declaration with a certain name, we can check for a
specific qualified name:
<pre>
bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
if (Declaration->getQualifiedNameAsString() == "n::m::C")
Declaration->dump();
return true;
}
</pre>
</p>
<!-- ======================================================================= -->
<h2 id="context">Accessing the SourceManager and ASTContext</h2>
<!-- ======================================================================= -->
<p>Some of the information about the AST, like source locations and global
identifier information, are not stored in the AST nodes themselves, but in
the ASTContext and its associated source manager. To retrieve them we need to
hand the ASTContext into our RecursiveASTVisitor implementation.</p>
<p>The ASTContext is available from the CompilerInstance during the call
to CreateASTConsumer. We can thus extract it there and hand it into our
freshly created FindNamedClassConsumer:</p>
<pre>
virtual clang::ASTConsumer *CreateASTConsumer(
clang::CompilerInstance &amp;Compiler, llvm::StringRef InFile) {
return new FindNamedClassConsumer(<b>&amp;Compiler.getASTContext()</b>);
}
</pre>
<p>Now that the ASTContext is available in the RecursiveASTVisitor, we can do
more interesting things with AST nodes, like looking up their source
locations:</p>
<pre>
bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
if (Declaration->getQualifiedNameAsString() == "n::m::C") {
// getFullLoc uses the ASTContext's SourceManager to resolve the source
// location and break it up into its line and column parts.
FullSourceLoc FullLocation = Context->getFullLoc(Declaration->getLocStart());
if (FullLocation.isValid())
llvm::outs() &lt;&lt; "Found declaration at "
&lt;&lt; FullLocation.getSpellingLineNumber() &lt;&lt; ":"
&lt;&lt; FullLocation.getSpellingColumnNumber() &lt;&lt; "\n";
}
return true;
}
</pre>
<!-- ======================================================================= -->
<h2 id="full">Putting it all together</h2>
<!-- ======================================================================= -->
<p>Now we can combine all of the above into a small example program:</p>
<pre>
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendAction.h"
#include "clang/Tooling/Tooling.h"
using namespace clang;
class FindNamedClassVisitor
: public RecursiveASTVisitor&lt;FindNamedClassVisitor> {
public:
explicit FindNamedClassVisitor(ASTContext *Context)
: Context(Context) {}
bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
if (Declaration->getQualifiedNameAsString() == "n::m::C") {
FullSourceLoc FullLocation = Context->getFullLoc(Declaration->getLocStart());
if (FullLocation.isValid())
llvm::outs() &lt;&lt; "Found declaration at "
&lt;&lt; FullLocation.getSpellingLineNumber() &lt;&lt; ":"
&lt;&lt; FullLocation.getSpellingColumnNumber() &lt;&lt; "\n";
}
return true;
}
private:
ASTContext *Context;
};
class FindNamedClassConsumer : public clang::ASTConsumer {
public:
explicit FindNamedClassConsumer(ASTContext *Context)
: Visitor(Context) {}
virtual void HandleTranslationUnit(clang::ASTContext &amp;Context) {
Visitor.TraverseDecl(Context.getTranslationUnitDecl());
}
private:
FindNamedClassVisitor Visitor;
};
class FindNamedClassAction : public clang::ASTFrontendAction {
public:
virtual clang::ASTConsumer *CreateASTConsumer(
clang::CompilerInstance &amp;Compiler, llvm::StringRef InFile) {
return new FindNamedClassConsumer(&amp;Compiler.getASTContext());
}
};
int main(int argc, char **argv) {
if (argc > 1) {
clang::tooling::runToolOnCode(new FindNamedClassAction, argv[1]);
}
}
</pre>
<p>We store this into a file called FindClassDecls.cpp and create the following
CMakeLists.txt to link it:</p>
<pre>
set(LLVM_USED_LIBS clangTooling)
add_clang_executable(find-class-decls FindClassDecls.cpp)
</pre>
<p>When running this tool over a small code snippet it will output all
declarations of a class n::m::C it found:</p>
<pre>
$ ./bin/find-class-decls "namespace n { namespace m { class C {}; } }"
Found declaration at 1:29
</pre>
</div>
</body>
</html>

216
docs/RAVFrontendAction.rst Normal file
View File

@ -0,0 +1,216 @@
==========================================================
How to write RecursiveASTVisitor based ASTFrontendActions.
==========================================================
Introduction
============
In this tutorial you will learn how to create a FrontendAction that uses
a RecursiveASTVisitor to find CXXRecordDecl AST nodes with a specified
name.
Creating a FrontendAction
=========================
When writing a clang based tool like a Clang Plugin or a standalone tool
based on LibTooling, the common entry point is the FrontendAction.
FrontendAction is an interface that allows execution of user specific
actions as part of the compilation. To run tools over the AST clang
provides the convenience interface ASTFrontendAction, which takes care
of executing the action. The only part left is to implement the
CreateASTConsumer method that returns an ASTConsumer per translation
unit.
::
class FindNamedClassAction : public clang::ASTFrontendAction {
public:
virtual clang::ASTConsumer *CreateASTConsumer(
clang::CompilerInstance &Compiler, llvm::StringRef InFile) {
return new FindNamedClassConsumer;
}
};
Creating an ASTConsumer
=======================
ASTConsumer is an interface used to write generic actions on an AST,
regardless of how the AST was produced. ASTConsumer provides many
different entry points, but for our use case the only one needed is
HandleTranslationUnit, which is called with the ASTContext for the
translation unit.
::
class FindNamedClassConsumer : public clang::ASTConsumer {
public:
virtual void HandleTranslationUnit(clang::ASTContext &Context) {
// Traversing the translation unit decl via a RecursiveASTVisitor
// will visit all nodes in the AST.
Visitor.TraverseDecl(Context.getTranslationUnitDecl());
}
private:
// A RecursiveASTVisitor implementation.
FindNamedClassVisitor Visitor;
};
Using the RecursiveASTVisitor
=============================
Now that everything is hooked up, the next step is to implement a
RecursiveASTVisitor to extract the relevant information from the AST.
The RecursiveASTVisitor provides hooks of the form bool
VisitNodeType(NodeType \*) for most AST nodes; the exception are TypeLoc
nodes, which are passed by-value. We only need to implement the methods
for the relevant node types.
Let's start by writing a RecursiveASTVisitor that visits all
CXXRecordDecl's.
::
class FindNamedClassVisitor
: public RecursiveASTVisitor<FindNamedClassVisitor> {
public:
bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
// For debugging, dumping the AST nodes will show which nodes are already
// being visited.
Declaration->dump();
// The return value indicates whether we want the visitation to proceed.
// Return false to stop the traversal of the AST.
return true;
}
};
In the methods of our RecursiveASTVisitor we can now use the full power
of the Clang AST to drill through to the parts that are interesting for
us. For example, to find all class declaration with a certain name, we
can check for a specific qualified name:
::
bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
if (Declaration->getQualifiedNameAsString() == "n::m::C")
Declaration->dump();
return true;
}
Accessing the SourceManager and ASTContext
==========================================
Some of the information about the AST, like source locations and global
identifier information, are not stored in the AST nodes themselves, but
in the ASTContext and its associated source manager. To retrieve them we
need to hand the ASTContext into our RecursiveASTVisitor implementation.
The ASTContext is available from the CompilerInstance during the call to
CreateASTConsumer. We can thus extract it there and hand it into our
freshly created FindNamedClassConsumer:
::
virtual clang::ASTConsumer *CreateASTConsumer(
clang::CompilerInstance &Compiler, llvm::StringRef InFile) {
return new FindNamedClassConsumer(&Compiler.getASTContext());
}
Now that the ASTContext is available in the RecursiveASTVisitor, we can
do more interesting things with AST nodes, like looking up their source
locations:
::
bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
if (Declaration->getQualifiedNameAsString() == "n::m::C") {
// getFullLoc uses the ASTContext's SourceManager to resolve the source
// location and break it up into its line and column parts.
FullSourceLoc FullLocation = Context->getFullLoc(Declaration->getLocStart());
if (FullLocation.isValid())
llvm::outs() << "Found declaration at "
<< FullLocation.getSpellingLineNumber() << ":"
<< FullLocation.getSpellingColumnNumber() << "\n";
}
return true;
}
Putting it all together
=======================
Now we can combine all of the above into a small example program:
::
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendAction.h"
#include "clang/Tooling/Tooling.h"
using namespace clang;
class FindNamedClassVisitor
: public RecursiveASTVisitor<FindNamedClassVisitor> {
public:
explicit FindNamedClassVisitor(ASTContext *Context)
: Context(Context) {}
bool VisitCXXRecordDecl(CXXRecordDecl *Declaration) {
if (Declaration->getQualifiedNameAsString() == "n::m::C") {
FullSourceLoc FullLocation = Context->getFullLoc(Declaration->getLocStart());
if (FullLocation.isValid())
llvm::outs() << "Found declaration at "
<< FullLocation.getSpellingLineNumber() << ":"
<< FullLocation.getSpellingColumnNumber() << "\n";
}
return true;
}
private:
ASTContext *Context;
};
class FindNamedClassConsumer : public clang::ASTConsumer {
public:
explicit FindNamedClassConsumer(ASTContext *Context)
: Visitor(Context) {}
virtual void HandleTranslationUnit(clang::ASTContext &Context) {
Visitor.TraverseDecl(Context.getTranslationUnitDecl());
}
private:
FindNamedClassVisitor Visitor;
};
class FindNamedClassAction : public clang::ASTFrontendAction {
public:
virtual clang::ASTConsumer *CreateASTConsumer(
clang::CompilerInstance &Compiler, llvm::StringRef InFile) {
return new FindNamedClassConsumer(&Compiler.getASTContext());
}
};
int main(int argc, char **argv) {
if (argc > 1) {
clang::tooling::runToolOnCode(new FindNamedClassAction, argv[1]);
}
}
We store this into a file called FindClassDecls.cpp and create the
following CMakeLists.txt to link it:
::
set(LLVM_USED_LIBS clangTooling)
add_clang_executable(find-class-decls FindClassDecls.cpp)
When running this tool over a small code snippet it will output all
declarations of a class n::m::C it found:
::
$ ./bin/find-class-decls "namespace n { namespace m { class C {}; } }"
Found declaration at 1:29

1
docs/README.txt Normal file
View File

@ -0,0 +1 @@
See llvm/docs/README.txt

View File

@ -1,325 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>Clang 3.2 Release Notes</title>
<link type="text/css" rel="stylesheet" href="../menu.css">
<link type="text/css" rel="stylesheet" href="../content.css">
<style type="text/css">
td {
vertical-align: top;
}
</style>
</head>
<body>
<!--#include virtual="../menu.html.incl"-->
<div id="content">
<h1>Clang 3.2 Release Notes</h1>
<img style="float:right" src="http://llvm.org/img/DragonSmall.png"
width="136" height="136" alt="LLVM Dragon Logo">
<ul>
<li><a href="#intro">Introduction</a></li>
<li><a href="#whatsnew">What's New in Clang 3.2?</a>
<ul>
<li><a href="#majorfeatures">Major New Features</a></li>
<li><a href="#newflags">New Compiler Flags</a></li>
<li><a href="#cchanges">C Language Changes</a></li>
<li><a href="#cxxchanges">C++ Language Changes</a></li>
<li><a href="#objcchanges">Objective-C Language Changes</a></li>
<li><a href="#apichanges">Internal API Changes</a></li>
<li><a href="#pythonchanges">Python Binding Changes</a></li>
</ul>
</li>
<li><a href="#knownproblems">Known Problems</a></li>
<li><a href="#additionalinfo">Additional Information</a></li>
</ul>
<div class="doc_author">
<p>Written by the <a href="http://llvm.org/">LLVM Team</a></p>
</div>
<!-- ======================================================================= -->
<h2 id="intro">Introduction</h2>
<!-- ======================================================================= -->
<p>This document contains the release notes for the Clang C/C++/Objective-C
frontend, part of the LLVM Compiler Infrastructure, release 3.2. Here we
describe the status of Clang in some detail, including major improvements
from the previous release and new feature work. For the general LLVM release
notes, see <a href="http://llvm.org/docs/ReleaseNotes.html">the LLVM
documentation</a>. All LLVM releases may be downloaded from the
<a href="http://llvm.org/releases/">LLVM releases web site</a>.</p>
<p>For more information about Clang or LLVM, including information about the
latest release, please check out the main please see the
<a href="http://clang.llvm.org">Clang Web Site</a> or the
<a href="http://llvm.org">LLVM Web Site</a>.
<p>Note that if you are reading this file from a Subversion checkout or the main
Clang web page, this document applies to the <i>next</i> release, not the
current one. To see the release notes for a specific release, please see the
<a href="http://llvm.org/releases/">releases page</a>.</p>
<!-- ======================================================================= -->
<h2 id="whatsnew">What's New in Clang 3.2?</h2>
<!-- ======================================================================= -->
<p>Some of the major new features and improvements to Clang are listed here.
Generic improvements to Clang as a whole or to its underlying infrastructure
are described first, followed by language-specific sections with improvements
to Clang's support for those languages.</p>
<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = -->
<h3 id="majorfeatures">Major New Features</h3>
<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = -->
<h4 id="diagnostics">Improvements to Clang's diagnostics</h4>
<p>Clang's diagnostics are constantly being improved to catch more issues,
explain them more clearly, and provide more accurate source information about
them. The improvements since the 3.1 release include:</p>
<ul>
<li><tt>-Wuninitialized</tt> has been taught to recognize uninitialized uses
which always occur when an explicitly-written non-constant condition is either
<tt>true</tt> or <tt>false</tt>. For example:
<pre>
int f(bool b) {
int n;
if (b)
n = 1;
return n;
}
<b>sometimes-uninit.cpp:3:7: <span class="warning">warning:</span> variable 'n' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]</b>
if (b)
<span class="caret">^</span>
<b>sometimes-uninit.cpp:5:10: <span class="note">note:</span></b> uninitialized use occurs here
return n;
<span class="caret">^</span>
<b>sometimes-uninit.cpp:3:3: <span class="note">note:</span></b> remove the 'if' if its condition is always true
if (b)
<span class="caret">^~~~~~</span>
<b>sometimes-uninit.cpp:2:8: <span class="note">note:</span></b> initialize the variable 'n' to silence this warning
int n;
<span class="caret">^</span>
<span class="caret"> = 0</span>
</pre>
This functionality can be enabled or disabled separately from
<tt>-Wuninitialized</tt> with the <tt>-Wsometimes-uninitialized</tt> warning
flag.</li>
<li>Template type diffing improves the display of diagnostics with templated
types in them.
<pre>
int f(vector&lt;map&lt;int, double&gt;&gt;);
int x = f(vector&lt;map&lt;int, float&gt;&gt;());
</pre>
The error message is the same, but the note is different based on the options selected.
<pre>
<b>template-diff.cpp:5:9: <span class="error">error:</span> no matching function for call to 'f'</b>
int x = f(vector&lt;map&lt;int, float&gt;&gt;());
<span class="caret">^</span>
</pre>
Templated type diffing with type elision (default):
<pre>
<b>template-diff.cpp:4:5: <span class="note">note:</span></b> candidate function not viable: no known conversion from 'vector&lt;map&lt;[...], <span class="template-highlight">float</span>&gt;&gt;' to 'vector&lt;map&lt;[...], <span class="template-highlight">double</span>&gt;&gt;' for 1st argument;
int f(vector&lt;map&lt;int, double&gt;&gt;);
<span class="caret">^</span>
</pre>
Templated type diffing without type elision (-fno-elide-type):
<pre>
<b>template-diff.cpp:4:5: <span class="note">note:</span></b> candidate function not viable: no known conversion from 'vector&lt;map&lt;int, <span class="template-highlight">float</span>&gt;&gt;' to 'vector&lt;map&lt;int, <span class="template-highlight">double</span>&gt;&gt;' for 1st argument;
int f(vector&lt;map&lt;int, double&gt;&gt;);
<span class="caret">^</span>
</pre>
Templated tree printing with type elision (-fdiagnostics-show-template-tree):
<pre>
<b>template-diff.cpp:4:5: <span class="note">note:</span></b> candidate function not viable: no known conversion for 1st argument;
vector&lt;
map&lt;
[...],
[<span class="template-highlight">float</span> != <span class="template-highlight">double</span>]&gt;&gt;
int f(vector&lt;map&lt;int, double&gt;&gt;);
<span class="caret">^</span>
</pre>
Templated tree printing without type elision (-fdiagnostics-show-template-tree -fno-elide-type):
<pre>
<b>template-diff.cpp:4:5: <span class="note">note:</span></b> candidate function not viable: no known conversion for 1st argument;
vector&lt;
map&lt;
int,
[<span class="template-highlight">float</span> != <span class="template-highlight">double</span>]&gt;&gt;
int f(vector&lt;map&lt;int, double&gt;&gt;);
<span class="caret">^</span>
</pre>
</li>
<li>The Address Sanitizer feature and Clang's <tt>-fcatch-undefined-behavior</tt> option have been moved to a unified flag set:
<tt>-fsanitize</tt>. This flag can be used to enable the different dynamic checking tools when building. For example,
<tt>-faddress-sanitizer</tt> is now <tt>-fsanitize=address</tt>, and <tt>-fcatch-undefined-behavior</tt> is now
<tt>-fsanitize=undefined</tt>. With this release the set of checks available continues to grow, see the Clang
documentation and specific sanitizer notes below for details.
</li>
</ul>
<h4 id="tlsmodel">Support for <code>tls_model</code> attribute</h4>
<p>Clang now supports the <code>tls_model</code> attribute, allowing code that
uses thread-local storage to explicitly select which model to use. The available
models are <code>"global-dynamic"</code>, <code>"local-dynamic"</code>,
<code>"initial-exec"</code> and <code>"local-exec"</code>. See
<a href="http://www.akkadia.org/drepper/tls.pdf">ELF Handling For Thread-Local
Storage</a> for more information.</p>
<p>The compiler is free to choose a different model if the specified model is not
supported by the target, or if the compiler determines that a more specific
model can be used.
</p>
<h4>Type safety attributes</h4>
<p>Clang now supports type safety attributes that allow checking during compile
time that 'void *' function arguments and arguments for variadic functions are
of a particular type which is determined by some other argument to the same
function call.</p>
<p>Usecases include:</p>
<ul>
<li>MPI library implementations, where these attributes enable checking that
buffer type matches the passed <code>MPI_Datatype</code>;</li>
<li> HDF5 library -- similar usecase as for MPI;</li>
<li> checking types of variadic functions' arguments for functions like
<code>fcntl()</code> and <code>ioctl()</code>.</li>
</ul>
<p>See entries for <code>argument_with_type_tag</code>,
<code>pointer_with_type_tag</code> and <code>type_tag_for_datatype</code>
attributes in Clang language extensions documentation.</p>
<h4>Documentation comment support</h4>
<p>Clang now supports documentation comments written in a Doxygen-like syntax.
Clang parses the comments and can detect syntactic and semantic errors in
comments. These warnings are off by default. Pass <tt>-Wdocumentation</tt>
flag to enable warnings about documentation comments.</p>
<p>For example, given:</p>
<pre>/// \param [in] Str the string.
/// \returns a modified string.
void do_something(const std::string &amp;str);</pre>
<p><tt>clang -Wdocumentation</tt> will emit two warnings:</p>
<pre><b>doc-test.cc:3:6: <span class="warning">warning:</span></b> '\returns' command used in a comment that is attached to a function returning void [-Wdocumentation]
/// \returns a modified string.
<span class="caret">~^~~~~~~~~~~~~~~~~~~~~~~~~~</span>
<b>doc-test.cc:2:17: <span class="warning">warning:</span></b> parameter 'Str' not found in the function declaration [-Wdocumentation]
/// \param [in] Str the string.
<span class="caret">^~~</span>
<b>doc-test.cc:2:17: <span class="note">note:</span></b> did you mean 'str'?
/// \param [in] Str the string.
<span class="caret">^~~</span>
<span class="caret">str</span></pre>
<p>libclang includes a new API, <tt>clang_FullComment_getAsXML</tt>, to convert
comments to XML documents. This API can be used to build documentation
extraction tools.</p>
<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = -->
<h3 id="newflags">New Compiler Flags</h3>
<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = -->
<ul>
<li><tt>-gline-tables-only</tt> controls the
<a href="http://clang.llvm.org/docs/UsersManual.html#debuginfosize">size of debug information</a>.
This flag tells Clang to emit debug info which is just enough to obtain stack traces with
function names, file names and line numbers (by such tools as gdb or addr2line).
Debug info for variables or function parameters is not produced, which reduces
the size of the resulting binary.
<li><tt>-ftls-model</tt> controls which TLS model to use for thread-local
variables. This can be overridden per variable using the
<a href="#tlsmodel"><tt>tls_model</tt> attribute</a> mentioned above.
For more details, see the <a href="UsersManual.html#opt_ftls-model">User's
Manual</a>.</li>
</ul>
<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = -->
<h3 id="cchanges">C Language Changes in Clang</h3>
<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = -->
<h4 id="c11changes">C11 Feature Support</h4>
<p>Clang 3.2 adds support for the C11 <code>_Alignof</code> keyword, pedantic warning through option
<code>-Wempty-translation-unit</code> (C11 6.9p1) </p>
<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = -->
<h3 id="cxxchanges">C++ Language Changes in Clang</h3>
<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = -->
<h4 id="cxx11changes">C++11 Feature Support</h4>
<p>Clang 3.2 supports <a href="http://clang.llvm.org/cxx_status.html#cxx11">most of the language features</a>
added in the latest ISO C++ standard,<a href="http://www.iso.org/iso/iso_catalogue/catalogue_tc/catalogue_detail.htm?csnumber=50372">C++ 2011</a>.
Use <code>-std=c++11</code> or <code>-std=gnu++11</code> to enable support for these features. In addition to the features supported by Clang 3.1, the
following features have been added:</p>
<ul>
<li>Implemented the C++11 discarded value expression rules for volatile lvalues.</li>
<li>Support for the C++11 enum forward declarations.</li>
<li>Handling of C++11 attribute namespaces (automatically).</li>
<li>Implemented C++11 [conv.prom]p4: an enumeration with a fixed underlying type has integral promotions
to both its underlying type and to its underlying type's promoted type.</li>
</ul>
<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = -->
<h3 id="objcchanges">Objective-C Language Changes in Clang</h3>
<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = -->
<p>Bug-fixes, no functionality changes.</p>
<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = -->
<h3 id="pythonchanges">Python Binding Changes</h3>
<!-- = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = -->
The following classes and methods have been added:
<ul>
<li>class CompilationDatabaseError(Exception)</li>
<li>class CompileCommand(object)</li>
<li>class CompileCommands(object)</li>
<li>class CompilationDatabase(ClangObject)</li>
<li>Cursor.is_static_method</li>
<li>Cursor.is_static_method</li>
<li>SourceLocation.from_offset</li>
<li>Cursor.is_static_method</li>
</ul>
<!-- ======================================================================= -->
<h2 id="additionalinfo">Additional Information</h2>
<!-- ======================================================================= -->
<p>A wide variety of additional information is available on the
<a href="http://clang.llvm.org/">Clang web page</a>. The web page contains
versions of the API documentation which are up-to-date with the Subversion
version of the source code. You can access versions of these documents
specific to this release by going into the "<tt>clang/doc/</tt>" directory in
the Clang tree.</p>
<p>If you have any questions or comments about Clang, please feel free to
contact us via
the <a href="http://lists.cs.uiuc.edu/mailman/listinfo/cfe-dev"> mailing
list</a>.</p>
</div>
</body>
</html>

147
docs/ReleaseNotes.rst Normal file
View File

@ -0,0 +1,147 @@
=====================================
Clang 3.3 (In-Progress) Release Notes
=====================================
.. contents::
:local:
:depth: 2
Written by the `LLVM Team <http://llvm.org/>`_
.. warning::
These are in-progress notes for the upcoming Clang 3.3 release. You may
prefer the `Clang 3.2 Release Notes
<http://llvm.org/releases/3.2/docs/ClangReleaseNotes.html>`_.
Introduction
============
This document contains the release notes for the Clang C/C++/Objective-C
frontend, part of the LLVM Compiler Infrastructure, release 3.3. Here we
describe the status of Clang in some detail, including major
improvements from the previous release and new feature work. For the
general LLVM release notes, see `the LLVM
documentation <http://llvm.org/docs/ReleaseNotes.html>`_. All LLVM
releases may be downloaded from the `LLVM releases web
site <http://llvm.org/releases/>`_.
For more information about Clang or LLVM, including information about
the latest release, please check out the main please see the `Clang Web
Site <http://clang.llvm.org>`_ or the `LLVM Web
Site <http://llvm.org>`_.
Note that if you are reading this file from a Subversion checkout or the
main Clang web page, this document applies to the *next* release, not
the current one. To see the release notes for a specific release, please
see the `releases page <http://llvm.org/releases/>`_.
What's New in Clang 3.3?
========================
Some of the major new features and improvements to Clang are listed
here. Generic improvements to Clang as a whole or to its underlying
infrastructure are described first, followed by language-specific
sections with improvements to Clang's support for those languages.
Major New Features
------------------
Improvements to Clang's diagnostics
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Clang's diagnostics are constantly being improved to catch more issues,
explain them more clearly, and provide more accurate source information
about them. The improvements since the 3.2 release include:
- ...
Extended Identifiers: Unicode Support and Universal Character Names
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Clang 3.3 includes support for *extended identifiers* in C99 and C++.
This feature allows identifiers to contain certain Unicode characters, as
specified by the active language standard; these characters can be written
directly in the source file using the UTF-8 encoding, or referred to using
*universal character names* (``\u00E0``, ``\U000000E0``).
New Compiler Flags
------------------
- ...
C Language Changes in Clang
---------------------------
C11 Feature Support
^^^^^^^^^^^^^^^^^^^
...
C++ Language Changes in Clang
-----------------------------
C++11 Feature Support
^^^^^^^^^^^^^^^^^^^^^
...
Objective-C Language Changes in Clang
-------------------------------------
...
Internal API Changes
--------------------
These are major API changes that have happened since the 3.2 release of
Clang. If upgrading an external codebase that uses Clang as a library,
this section should help get you past the largest hurdles of upgrading.
Value Casting
^^^^^^^^^^^^^
Certain type hierarchies (TypeLoc, CFGElement, ProgramPoint, and SVal) were
misusing the llvm::cast machinery to perform undefined operations. Their APIs
have been changed to use two member function templates that return values
instead of pointers or references - "T castAs" and "Optional<T> getAs" (in the
case of the TypeLoc hierarchy the latter is "T getAs" and you can use the
boolean testability of a TypeLoc (or its 'validity') to verify that the cast
succeeded). Essentially all previous 'cast' usage should be replaced with
'castAs' and 'dyn_cast' should be replaced with 'getAs'. See r175462 for the
first example of such a change along with many examples of how code was
migrated to the new API.
Storage Class
^^^^^^^^^^^^^
For each variable and function Clang used to keep the storage class as written
in the source, the linkage and a semantic storage class. This was a bit
redundant and the semantic storage class has been removed. The method
getStorageClass now returns what is written it the source code for that decl.
...
Python Binding Changes
----------------------
The following methods have been added:
- ...
Significant Known Problems
==========================
Additional Information
======================
A wide variety of additional information is available on the `Clang web
page <http://clang.llvm.org/>`_. The web page contains versions of the
API documentation which are up-to-date with the Subversion version of
the source code. You can access versions of these documents specific to
this release by going into the "``clang/docs/``" directory in the Clang
tree.
If you have any questions or comments about Clang, please feel free to
contact us via the `mailing
list <http://lists.cs.uiuc.edu/mailman/listinfo/cfe-dev>`_.

View File

@ -1,126 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<!-- Material used from: HTML 4.01 specs: http://www.w3.org/TR/html401/ -->
<html>
<head>
<META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
<title>ThreadSanitizer, a race detector</title>
<link type="text/css" rel="stylesheet" href="../menu.css">
<link type="text/css" rel="stylesheet" href="../content.css">
<style type="text/css">
td {
vertical-align: top;
}
</style>
</head>
<body>
<!--#include virtual="../menu.html.incl"-->
<div id="content">
<h1>ThreadSanitizer</h1>
<ul>
<li> <a href="#intro">Introduction</a>
<li> <a href="#howtobuild">How to Build</a>
<li> <a href="#platforms">Supported Platforms</a>
<li> <a href="#usage">Usage</a>
<li> <a href="#limitations">Limitations</a>
<li> <a href="#status">Current Status</a>
<li> <a href="#moreinfo">More Information</a>
</ul>
<h2 id="intro">Introduction</h2>
ThreadSanitizer is a tool that detects data races. <BR>
It consists of a compiler instrumentation module and a run-time library. <BR>
Typical slowdown introduced by ThreadSanitizer is <b>5x-15x</b> (TODO: these numbers are
approximate so far).
<h2 id="howtobuild">How to build</h2>
Follow the <a href="../get_started.html">clang build instructions</a>.
CMake build is supported.<BR>
<h2 id="platforms">Supported Platforms</h2>
ThreadSanitizer is supported on Linux x86_64 (tested on Ubuntu 10.04). <BR>
Support for MacOS 10.7 (64-bit only) is planned for late 2012. <BR>
Support for 32-bit platforms is problematic and not yet planned.
<h2 id="usage">Usage</h2>
Simply compile your program with <tt>-fsanitize=thread -fPIE</tt> and link it
with <tt>-fsanitize=thread -pie</tt>.<BR>
To get a reasonable performance add <tt>-O1</tt> or higher. <BR>
Use <tt>-g</tt> to get file names and line numbers in the warning messages. <BR>
Example:
<pre>
% cat projects/compiler-rt/lib/tsan/output_tests/tiny_race.c
#include <pthread.h>
int Global;
void *Thread1(void *x) {
Global = 42;
return x;
}
int main() {
pthread_t t;
pthread_create(&t, NULL, Thread1, NULL);
Global = 43;
pthread_join(t, NULL);
return Global;
}
</pre>
<pre>
% clang -fsanitize=thread -g -O1 tiny_race.c -fPIE -pie
</pre>
If a bug is detected, the program will print an error message to stderr.
Currently, ThreadSanitizer symbolizes its output using an external
<tt>addr2line</tt>
process (this will be fixed in future).
<pre>
% TSAN_OPTIONS=strip_path_prefix=`pwd`/ # Don't print full paths.
% ./a.out 2> log
% cat log
WARNING: ThreadSanitizer: data race (pid=19219)
Write of size 4 at 0x7fcf47b21bc0 by thread 1:
#0 Thread1 tiny_race.c:4 (exe+0x00000000a360)
Previous write of size 4 at 0x7fcf47b21bc0 by main thread:
#0 main tiny_race.c:10 (exe+0x00000000a3b4)
Thread 1 (running) created at:
#0 pthread_create ??:0 (exe+0x00000000c790)
#1 main tiny_race.c:9 (exe+0x00000000a3a4)
</pre>
<h2 id="limitations">Limitations</h2>
<ul>
<li> ThreadSanitizer uses more real memory than a native run.
At the default settings the memory overhead is 9x plus 9Mb per each thread.
Settings with 5x and 3x overhead (but less accurate analysis) are also available.
<li> ThreadSanitizer maps (but does not reserve) a lot of virtual address space.
This means that tools like <tt>ulimit</tt> may not work as usually expected.
<li> Static linking is not supported.
<li> ThreadSanitizer requires <tt>-fPIE -pie</tt>
</ul>
<h2 id="status">Current Status</h2>
ThreadSanitizer is in alpha stage.
It is known to work on large C++ programs using pthreads, but we do not promise
anything (yet). <BR>
C++11 threading is not yet supported. <BR>
The test suite is integrated into CMake build and can be run with
<tt>make check-tsan</tt> command. <BR>
We are actively working on enhancing the tool -- stay tuned.
Any help, especially in the form of minimized standalone tests is more than welcome.
<h2 id="moreinfo">More Information</h2>
<a href="http://code.google.com/p/thread-sanitizer/">http://code.google.com/p/thread-sanitizer</a>.
</div>
</body>
</html>

126
docs/ThreadSanitizer.rst Normal file
View File

@ -0,0 +1,126 @@
ThreadSanitizer
===============
Introduction
------------
ThreadSanitizer is a tool that detects data races. It consists of a compiler
instrumentation module and a run-time library. Typical slowdown introduced by
ThreadSanitizer is about **5x-15x**. Typical memory overhead introduced by
ThreadSanitizer is about **5x-10x**.
How to build
------------
Follow the `Clang build instructions <../get_started.html>`_. CMake build is
supported.
Supported Platforms
-------------------
ThreadSanitizer is supported on Linux x86_64 (tested on Ubuntu 10.04 and 12.04).
Support for MacOS 10.7 (64-bit only) is planned for 2013. Support for 32-bit
platforms is problematic and not yet planned.
Usage
-----
Simply compile your program with ``-fsanitize=thread -fPIE`` and link it with
``-fsanitize=thread -pie``. To get a reasonable performance add ``-O1`` or
higher. Use ``-g`` to get file names and line numbers in the warning messages.
Example:
.. code-block:: c++
% cat projects/compiler-rt/lib/tsan/lit_tests/tiny_race.c
#include <pthread.h>
int Global;
void *Thread1(void *x) {
Global = 42;
return x;
}
int main() {
pthread_t t;
pthread_create(&t, NULL, Thread1, NULL);
Global = 43;
pthread_join(t, NULL);
return Global;
}
$ clang -fsanitize=thread -g -O1 tiny_race.c -fPIE -pie
If a bug is detected, the program will print an error message to stderr.
Currently, ThreadSanitizer symbolizes its output using an external
``addr2line`` process (this will be fixed in future).
.. code-block:: bash
% ./a.out
WARNING: ThreadSanitizer: data race (pid=19219)
Write of size 4 at 0x7fcf47b21bc0 by thread T1:
#0 Thread1 tiny_race.c:4 (exe+0x00000000a360)
Previous write of size 4 at 0x7fcf47b21bc0 by main thread:
#0 main tiny_race.c:10 (exe+0x00000000a3b4)
Thread T1 (running) created at:
#0 pthread_create tsan_interceptors.cc:705 (exe+0x00000000c790)
#1 main tiny_race.c:9 (exe+0x00000000a3a4)
``__has_feature(thread_sanitizer)``
------------------------------------
In some cases one may need to execute different code depending on whether
ThreadSanitizer is enabled.
:ref:`\_\_has\_feature <langext-__has_feature-__has_extension>` can be used for
this purpose.
.. code-block:: c
#if defined(__has_feature)
# if __has_feature(thread_sanitizer)
// code that builds only under ThreadSanitizer
# endif
#endif
``__attribute__((no_sanitize_thread))``
-----------------------------------------------
Some code should not be instrumented by ThreadSanitizer.
One may use the function attribute
:ref:`no_sanitize_thread <langext-thread_sanitizer>`
to disable instrumentation of plain (non-atomic) loads/stores in a particular function.
ThreadSanitizer may still instrument such functions to avoid false positives.
This attribute may not be
supported by other compilers, so we suggest to use it together with
``__has_feature(thread_sanitizer)``. Note: currently, this attribute will be
lost if the function is inlined.
Limitations
-----------
* ThreadSanitizer uses more real memory than a native run. At the default
settings the memory overhead is 5x plus 1Mb per each thread. Settings with 3x
(less accurate analysis) and 9x (more accurate analysis) overhead are also
available.
* ThreadSanitizer maps (but does not reserve) a lot of virtual address space.
This means that tools like ``ulimit`` may not work as usually expected.
* Libc/libstdc++ static linking is not supported.
* ThreadSanitizer requires ``-fPIE -pie`` compiler flags.
Current Status
--------------
ThreadSanitizer is in beta stage. It is known to work on large C++ programs
using pthreads, but we do not promise anything (yet). C++11 threading is
supported with llvm libc++. The test suite is integrated into CMake build
and can be run with ``make check-tsan`` command.
We are actively working on enhancing the tool --- stay tuned. Any help,
especially in the form of minimized standalone tests is more than welcome.
More Information
----------------
`http://code.google.com/p/thread-sanitizer <http://code.google.com/p/thread-sanitizer/>`_.

View File

@ -1,120 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>Writing Clang Tools</title>
<link type="text/css" rel="stylesheet" href="../menu.css">
<link type="text/css" rel="stylesheet" href="../content.css">
</head>
<body>
<!--#include virtual="../menu.html.incl"-->
<div id="content">
<h1>Writing Clang Tools</h1>
<p>Clang provides infrastructure to write tools that need syntactic and semantic
information about a program. This document will give a short introduction of the
different ways to write clang tools, and their pros and cons.</p>
<!-- ======================================================================= -->
<h2 id="libclang"><a href="http://clang.llvm.org/doxygen/group__CINDEX.html">LibClang</a></h2>
<!-- ======================================================================= -->
<p>LibClang is a stable high level C interface to clang. When in doubt LibClang
is probably the interface you want to use. Consider the other interfaces only
when you have a good reason not to use LibClang.</p>
<p>Canonical examples of when to use LibClang:</p>
<ul>
<li>Xcode</li>
<li>Clang Python Bindings</li>
</ul>
<p>Use LibClang when you...</p>
<ul>
<li>want to interface with clang from other languages than C++</li>
<li>need a stable interface that takes care to be backwards compatible</li>
<li>want powerful high-level abstractions, like iterating through an AST
with a cursor, and don't want to learn all the nitty gritty details of Clang's
AST.</li>
</ul>
<p>Do not use LibClang when you...</p>
<ul>
<li>want full control over the Clang AST</li>
</ul>
<!-- ======================================================================= -->
<h2 id="clang-plugins"><a href="ClangPlugins.html">Clang Plugins</a></h2>
<!-- ======================================================================= -->
<p>Clang Plugins allow you to run additional actions on the AST as part of
a compilation. Plugins are dynamic libraries that are loaded at runtime by
the compiler, and they're easy to integrate into your build environment.</p>
<p>Canonical examples of when to use Clang Plugins:</p>
<ul>
<li>special lint-style warnings or errors for your project</li>
<li>creating additional build artifacts from a single compile step</li>
</ul>
<p>Use Clang Plugins when you...</p>
<ul>
<li>need your tool to rerun if any of the dependencies change</li>
<li>want your tool to make or break a build</li>
<li>need full control over the Clang AST</li>
</ul>
<p>Do not use Clang Plugins when you...</p>
<ul>
<li>want to run tools outside of your build environment</li>
<li>want full control on how Clang is set up, including mapping of in-memory
virtual files</li>
<li>need to run over a specific subset of files in your project which is not
necessarily related to any changes which would trigger rebuilds</li>
</ul>
<!-- ======================================================================= -->
<h2 id="libtooling"><a href="LibTooling.html">LibTooling</a></h2>
<!-- ======================================================================= -->
<p>LibTooling is a C++ interface aimed at writing standalone tools, as well as
integrating into services that run clang tools.</p>
<p>Canonical examples of when to use LibTooling:</p>
<ul>
<li>a simple syntax checker</li>
<li>refactoring tools</li>
</ul>
<p>Use LibTooling when you...</p>
<ul>
<li>want to run tools over a single file, or a specific subset of files,
independently of the build system</li>
<li>want full control over the Clang AST</li>
<li>want to share code with Clang Plugins</li>
</ul>
<p>Do not use LibTooling when you...</p>
<ul>
<li>want to run as part of the build triggered by dependency changes</li>
<li>want a stable interface so you don't need to change your code when the
AST API changes</li>
<li>want high level abstractions like cursors and code completion out of the
box</li>
<li>do not want to write your tools in C++</li>
</ul>
<!-- ======================================================================= -->
<h2 id="clang-tools"><a href="ClangTools.html">Clang Tools</a></h2>
<!-- ======================================================================= -->
<p>These are a collection of specific developer tools built on top of the
LibTooling infrastructure as part of the Clang project. They are targeted at
automating and improving core development activities of C/C++ developers.</p>
<p>Examples of tools we are building or planning as part of the Clang
project:</p>
<ul>
<li>Syntax checking (clang-check)</li>
<li>Automatic fixing of compile errors (clangc-fixit)</li>
<li>Automatic code formatting</li>
<li>Migration tools for new features in new language standards</li>
<li>Core refactoring tools</li>
</ul>
</div>
</body>
</html>

97
docs/Tooling.rst Normal file
View File

@ -0,0 +1,97 @@
=================================================
Choosing the Right Interface for Your Application
=================================================
Clang provides infrastructure to write tools that need syntactic and semantic
information about a program. This document will give a short introduction of
the different ways to write clang tools, and their pros and cons.
LibClang
--------
`LibClang <http://clang.llvm.org/doxygen/group__CINDEX.html>`_ is a stable high
level C interface to clang. When in doubt LibClang is probably the interface
you want to use. Consider the other interfaces only when you have a good
reason not to use LibClang.
Canonical examples of when to use LibClang:
* Xcode
* Clang Python Bindings
Use LibClang when you...:
* want to interface with clang from other languages than C++
* need a stable interface that takes care to be backwards compatible
* want powerful high-level abstractions, like iterating through an AST with a
cursor, and don't want to learn all the nitty gritty details of Clang's AST.
Do not use LibClang when you...:
* want full control over the Clang AST
Clang Plugins
-------------
:doc:`Clang Plugins <ClangPlugins>` allow you to run additional actions on the
AST as part of a compilation. Plugins are dynamic libraries that are loaded at
runtime by the compiler, and they're easy to integrate into your build
environment.
Canonical examples of when to use Clang Plugins:
* special lint-style warnings or errors for your project
* creating additional build artifacts from a single compile step
Use Clang Plugins when you...:
* need your tool to rerun if any of the dependencies change
* want your tool to make or break a build
* need full control over the Clang AST
Do not use Clang Plugins when you...:
* want to run tools outside of your build environment
* want full control on how Clang is set up, including mapping of in-memory
virtual files
* need to run over a specific subset of files in your project which is not
necessarily related to any changes which would trigger rebuilds
LibTooling
----------
:doc:`LibTooling <LibTooling>` is a C++ interface aimed at writing standalone
tools, as well as integrating into services that run clang tools. Canonical
examples of when to use LibTooling:
* a simple syntax checker
* refactoring tools
Use LibTooling when you...:
* want to run tools over a single file, or a specific subset of files,
independently of the build system
* want full control over the Clang AST
* want to share code with Clang Plugins
Do not use LibTooling when you...:
* want to run as part of the build triggered by dependency changes
* want a stable interface so you don't need to change your code when the AST API
changes
* want high level abstractions like cursors and code completion out of the box
* do not want to write your tools in C++
:doc:`Clang tools <ClangTools>` are a collection of specific developer tools
built on top of the LibTooling infrastructure as part of the Clang project.
They are targeted at automating and improving core development activities of
C/C++ developers.
Examples of tools we are building or planning as part of the Clang project:
* Syntax checking (:program:`clang-check`)
* Automatic fixing of compile errors (:program:`clang-fixit`)
* Automatic code formatting (:program:`clang-format`)
* Migration tools for new features in new language standards
* Core refactoring tools

File diff suppressed because it is too large Load Diff

1313
docs/UsersManual.rst Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,134 @@
============
Debug Checks
============
.. contents::
:local:
The analyzer contains a number of checkers which can aid in debugging. Enable
them by using the "-analyzer-checker=" flag, followed by the name of the
checker.
General Analysis Dumpers
========================
These checkers are used to dump the results of various infrastructural analyses
to stderr. Some checkers also have "view" variants, which will display a graph
using a 'dot' format viewer (such as Graphviz on OS X) instead.
- debug.DumpCallGraph, debug.ViewCallGraph: Show the call graph generated for
the current translation unit. This is used to determine the order in which to
analyze functions when inlining is enabled.
- debug.DumpCFG, debug.ViewCFG: Show the CFG generated for each top-level
function being analyzed.
- debug.DumpDominators: Shows the dominance tree for the CFG of each top-level
function.
- debug.DumpLiveVars: Show the results of live variable analysis for each
top-level function being analyzed.
Path Tracking
=============
These checkers print information about the path taken by the analyzer engine.
- debug.DumpCalls: Prints out every function or method call encountered during a
path traversal. This is indented to show the call stack, but does NOT do any
special handling of branches, meaning different paths could end up
interleaved.
- debug.DumpTraversal: Prints the name of each branch statement encountered
during a path traversal ("IfStmt", "WhileStmt", etc). Currently used to check
whether the analysis engine is doing BFS or DFS.
State Checking
==============
These checkers will print out information about the analyzer state in the form
of analysis warnings. They are intended for use with the -verify functionality
in regression tests.
- debug.TaintTest: Prints out the word "tainted" for every expression that
carries taint. At the time of this writing, taint was only introduced by the
checks under experimental.security.taint.TaintPropagation; this checker may
eventually move to the security.taint package.
- debug.ExprInspection: Responds to certain function calls, which are modeled
after builtins. These function calls should affect the program state other
than the evaluation of their arguments; to use them, you will need to declare
them within your test file. The available functions are described below.
(FIXME: debug.ExprInspection should probably be renamed, since it no longer only
inspects expressions.)
ExprInspection checks
---------------------
- void clang_analyzer_eval(bool);
Prints TRUE if the argument is known to have a non-zero value, FALSE if the
argument is known to have a zero or null value, and UNKNOWN if the argument
isn't sufficiently constrained on this path. You can use this to test other
values by using expressions like "x == 5". Note that this functionality is
currently DISABLED in inlined functions, since different calls to the same
inlined function could provide different information, making it difficult to
write proper -verify directives.
In C, the argument can be typed as 'int' or as '_Bool'.
Example usage::
clang_analyzer_eval(x); // expected-warning{{UNKNOWN}}
if (!x) return;
clang_analyzer_eval(x); // expected-warning{{TRUE}}
- void clang_analyzer_checkInlined(bool);
If a call occurs within an inlined function, prints TRUE or FALSE according to
the value of its argument. If a call occurs outside an inlined function,
nothing is printed.
The intended use of this checker is to assert that a function is inlined at
least once (by passing 'true' and expecting a warning), or to assert that a
function is never inlined (by passing 'false' and expecting no warning). The
argument is technically unnecessary but is intended to clarify intent.
You might wonder why we can't print TRUE if a function is ever inlined and
FALSE if it is not. The problem is that any inlined function could conceivably
also be analyzed as a top-level function (in which case both TRUE and FALSE
would be printed), depending on the value of the -analyzer-inlining option.
In C, the argument can be typed as 'int' or as '_Bool'.
Example usage::
int inlined() {
clang_analyzer_checkInlined(true); // expected-warning{{TRUE}}
return 42;
}
void topLevel() {
clang_analyzer_checkInlined(false); // no-warning (not inlined)
int value = inlined();
// This assertion will not be valid if the previous call was not inlined.
clang_analyzer_eval(value == 42); // expected-warning{{TRUE}}
}
Statistics
==========
The debug.Stats checker collects various information about the analysis of each
function, such as how many blocks were reached and if the analyzer timed out.
There is also an additional -analyzer-stats flag, which enables various
statistics within the analyzer engine. Note the Stats checker (which produces at
least one bug report per function) may actually change the values reported by
-analyzer-stats.

View File

@ -2,36 +2,37 @@ Inlining
========
There are several options that control which calls the analyzer will consider for
inlining. The major one is -analyzer-ipa:
inlining. The major one is -analyzer-config ipa:
-analyzer-ipa=none - All inlining is disabled. This is the only mode available
in LLVM 3.1 and earlier and in Xcode 4.3 and earlier.
-analyzer-config ipa=none - All inlining is disabled. This is the only mode
available in LLVM 3.1 and earlier and in Xcode 4.3 and earlier.
-analyzer-ipa=basic-inlining - Turns on inlining for C functions, C++ static
member functions, and blocks -- essentially, the calls that behave like
simple C function calls. This is essentially the mode used in Xcode 4.4.
-analyzer-config ipa=basic-inlining - Turns on inlining for C functions, C++
static member functions, and blocks -- essentially, the calls that behave
like simple C function calls. This is essentially the mode used in
Xcode 4.4.
-analyzer-ipa=inlining - Turns on inlining when we can confidently find the
function/method body corresponding to the call. (C functions, static
-analyzer-config ipa=inlining - Turns on inlining when we can confidently find
the function/method body corresponding to the call. (C functions, static
functions, devirtualized C++ methods, Objective-C class methods, Objective-C
instance methods when ExprEngine is confident about the dynamic type of the
instance).
-analyzer-ipa=dynamic - Inline instance methods for which the type is
-analyzer-config ipa=dynamic - Inline instance methods for which the type is
determined at runtime and we are not 100% sure that our type info is
correct. For virtual calls, inline the most plausible definition.
-analyzer-ipa=dynamic-bifurcate - Same as -analyzer-ipa=dynamic, but the path
is split. We inline on one branch and do not inline on the other. This mode
does not drop the coverage in cases when the parent class has code that is
only exercised when some of its methods are overridden.
-analyzer-config ipa=dynamic-bifurcate - Same as -analyzer-config ipa=dynamic,
but the path is split. We inline on one branch and do not inline on the
other. This mode does not drop the coverage in cases when the parent class
has code that is only exercised when some of its methods are overridden.
Currently, -analyzer-ipa=dynamic-bifurcate is the default mode.
Currently, -analyzer-config ipa=dynamic-bifurcate is the default mode.
While -analyzer-ipa determines in general how aggressively the analyzer will try to
inline functions, several additional options control which types of functions can
inlined, in an all-or-nothing way. These options use the analyzer's configuration
table, so they are all specified as follows:
While -analyzer-config ipa determines in general how aggressively the analyzer
will try to inline functions, several additional options control which types of
functions can inlined, in an all-or-nothing way. These options use the
analyzer's configuration table, so they are all specified as follows:
-analyzer-config OPTION=VALUE
@ -45,10 +46,14 @@ Each of these modes implies that all the previous member function kinds will be
inlined as well; it doesn't make sense to inline destructors without inlining
constructors, for example.
The default c++-inlining mode is 'methods', meaning only regular member
functions and overloaded operators will be inlined. Note that no C++ member
functions will be inlined under -analyzer-ipa=none or
-analyzer-ipa=basic-inlining.
The default c++-inlining mode is 'destructors', meaning that all member
functions with visible definitions will be considered for inlining. In some
cases the analyzer may still choose not to inline the function.
Note that under 'constructors', constructors for types with non-trivial
destructors will not be inlined. Additionally, no C++ member functions will be
inlined under -analyzer-config ipa=none or -analyzer-config ipa=basic-inlining,
regardless of the setting of the c++-inlining mode.
### c++-template-inlining ###
@ -71,7 +76,8 @@ considered for inlining.
-analyzer-config c++-template-inlining=[true | false]
Currently, C++ standard library functions are NOT considered for inlining by default.
Currently, C++ standard library functions are considered for inlining by
default.
The standard library functions and the STL in particular are used ubiquitously
enough that our tolerance for false positives is even lower here. A false
@ -79,6 +85,31 @@ positive due to poor modeling of the STL leads to a poor user experience, since
most users would not be comfortable adding assertions to system headers in order
to silence analyzer warnings.
### c++-container-inlining ###
This option controls whether constructors and destructors of "container" types
should be considered for inlining.
-analyzer-config c++-container-inlining=[true | false]
Currently, these constructors and destructors are NOT considered for inlining
by default.
The current implementation of this setting checks whether a type has a member
named 'iterator' or a member named 'begin'; these names are idiomatic in C++,
with the latter specified in the C++11 standard. The analyzer currently does a
fairly poor job of modeling certain data structure invariants of container-like
objects. For example, these three expressions should be equivalent:
std::distance(c.begin(), c.end()) == 0
c.begin() == c.end()
c.empty())
Many of these issues are avoided if containers always have unknown, symbolic
state, which is what happens when their constructors are treated as opaque.
In the future, we may decide specific containers are "safe" to model through
inlining, or choose to model them directly using checkers instead.
Basics of Implementation
-----------------------
@ -229,31 +260,31 @@ inlined.
== Inlining Dynamic Calls ==
The -analyzer-ipa option has five different modes: none, basic-inlining,
inlining, dynamic, and dynamic-bifurcate. Under -analyzer-ipa=dynamic, all
dynamic calls are inlined, whether we are certain or not that this will actually
be the definition used at runtime. Under -analyzer-ipa=inlining, only
"near-perfect" devirtualized calls are inlined*, and other dynamic calls are
evaluated conservatively (as if no definition were available).
The -analyzer-config ipa option has five different modes: none, basic-inlining,
inlining, dynamic, and dynamic-bifurcate. Under -analyzer-config ipa=dynamic,
all dynamic calls are inlined, whether we are certain or not that this will
actually be the definition used at runtime. Under -analyzer-config ipa=inlining,
only "near-perfect" devirtualized calls are inlined*, and other dynamic calls
are evaluated conservatively (as if no definition were available).
* Currently, no Objective-C messages are not inlined under
-analyzer-ipa=inlining, even if we are reasonably confident of the type of the
receiver. We plan to enable this once we have tested our heuristics more
thoroughly.
-analyzer-config ipa=inlining, even if we are reasonably confident of the type
of the receiver. We plan to enable this once we have tested our heuristics
more thoroughly.
The last option, -analyzer-ipa=dynamic-bifurcate, behaves similarly to
The last option, -analyzer-config ipa=dynamic-bifurcate, behaves similarly to
"dynamic", but performs a conservative invalidation in the general virtual case
in *addition* to inlining. The details of this are discussed below.
As stated above, -analyzer-ipa=basic-inlining does not inline any C++ member
functions or Objective-C method calls, even if they are non-virtual or can be
safely devirtualized.
As stated above, -analyzer-config ipa=basic-inlining does not inline any C++
member functions or Objective-C method calls, even if they are non-virtual or
can be safely devirtualized.
Bifurcation
-----------
ExprEngine::BifurcateCall implements the -analyzer-ipa=dynamic-bifurcate
ExprEngine::BifurcateCall implements the -analyzer-config ipa=dynamic-bifurcate
mode.
When a call is made on an object with imprecise dynamic type information

155
docs/analyzer/Makefile Normal file
View File

@ -0,0 +1,155 @@
# Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = _build
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
default: html
help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
clean:
-rm -rf $(BUILDDIR)/*
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/ClangStaticAnalyzer.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/ClangStaticAnalyzer.qhc"
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/ClangStaticAnalyzer"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/ClangStaticAnalyzer"
@echo "# devhelp"
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."

View File

@ -0,0 +1,171 @@
The analyzer "Store" represents the contents of memory regions. It is an opaque
functional data structure stored in each ProgramState; the only class that can
modify the store is its associated StoreManager.
Currently (Feb. 2013), the only StoreManager implementation being used is
RegionStoreManager. This store records bindings to memory regions using a "base
region + offset" key. (This allows `*p` and `p[0]` to map to the same location,
among other benefits.)
Regions are grouped into "clusters", which roughly correspond to "regions with
the same base region". This allows certain operations to be more efficient,
such as invalidation.
Regions that do not have a known offset use a special "symbolic" offset. These
keys store both the original region, and the "concrete offset region" -- the
last region whose offset is entirely concrete. (For example, in the expression
`foo.bar[1][i].baz`, the concrete offset region is the array `foo.bar[1]`,
since that has a known offset from the start of the top-level `foo` struct.)
Binding Invalidation
====================
Supporting both concrete and symbolic offsets makes things a bit tricky. Here's
an example:
foo[0] = 0;
foo[1] = 1;
foo[i] = i;
After the third assignment, nothing can be said about the value of `foo[0]`,
because `foo[i]` may have overwritten it! Thus, *binding to a region with a
symbolic offset invalidates the entire concrete offset region.* We know
`foo[i]` is somewhere within `foo`, so we don't have to invalidate anything
else, but we do have to be conservative about all other bindings within `foo`.
Continuing the example:
foo[i] = i;
foo[0] = 0;
After this latest assignment, nothing can be said about the value of `foo[i]`,
because `foo[0]` may have overwritten it! *Binding to a region R with a
concrete offset invalidates any symbolic offset bindings whose concrete offset
region is a super-region **or** sub-region of R.* All we know about `foo[i]` is
that it is somewhere within `foo`, so changing *anything* within `foo` might
change `foo[i]`, and changing *all* of `foo` (or its base region) will
*definitely* change `foo[i]`.
This logic could be improved by using the current constraints on `i`, at the
cost of speed. The latter case could also be improved by matching region kinds,
i.e. changing `foo[0].a` is unlikely to affect `foo[i].b`, no matter what `i`
is.
For more detail, read through RegionStoreManager::removeSubRegionBindings in
RegionStore.cpp.
ObjCIvarRegions
===============
Objective-C instance variables require a bit of special handling. Like struct
fields, they are not base regions, and when their parent object region is
invalidated, all the instance variables must be invalidated as well. However,
they have no concrete compile-time offsets (in the modern, "non-fragile"
runtime), and so cannot easily be represented as an offset from the start of
the object in the analyzer. Moreover, this means that invalidating a single
instance variable should *not* invalidate the rest of the object, since unlike
struct fields or array elements there is no way to perform pointer arithmetic
to access another instance variable.
Consequently, although the base region of an ObjCIvarRegion is the entire
object, RegionStore offsets are computed from the start of the instance
variable. Thus it is not valid to assume that all bindings with non-symbolic
offsets start from the base region!
Region Invalidation
===================
Unlike binding invalidation, region invalidation occurs when the entire
contents of a region may have changed---say, because it has been passed to a
function the analyzer can model, like memcpy, or because its address has
escaped, usually as an argument to an opaque function call. In these cases we
need to throw away not just all bindings within the region itself, but within
its entire cluster, since neighboring regions may be accessed via pointer
arithmetic.
Region invalidation typically does even more than this, however. Because it
usually represents the complete escape of a region from the analyzer's model,
its *contents* must also be transitively invalidated. (For example, if a region
'p' of type 'int **' is invalidated, the contents of '*p' and '**p' may have
changed as well.) The algorithm that traverses this transitive closure of
accessible regions is known as ClusterAnalysis, and is also used for finding
all live bindings in the store (in order to throw away the dead ones). The name
"ClusterAnalysis" predates the cluster-based organization of bindings, but
refers to the same concept: during invalidation and liveness analysis, all
bindings within a cluster must be treated in the same way for a conservative
model of program behavior.
Default Bindings
================
Most bindings in RegionStore are simple scalar values -- integers and pointers.
These are known as "Direct" bindings. However, RegionStore supports a second
type of binding called a "Default" binding. These are used to provide values to
all the elements of an aggregate type (struct or array) without having to
explicitly specify a binding for each individual element.
When there is no Direct binding for a particular region, the store manager
looks at each super-region in turn to see if there is a Default binding. If so,
this value is used as the value of the original region. The search ends when
the base region is reached, at which point the RegionStore will pick an
appropriate default value for the region (usually a symbolic value, but
sometimes zero, for static data, or "uninitialized", for stack variables).
int manyInts[10];
manyInts[1] = 42; // Creates a Direct binding for manyInts[1].
print(manyInts[1]); // Retrieves the Direct binding for manyInts[1];
print(manyInts[0]); // There is no Direct binding for manyInts[1].
// Is there a Default binding for the entire array?
// There is not, but it is a stack variable, so we use
// "uninitialized" as the default value (and emit a
// diagnostic!).
NOTE: The fact that bindings are stored as a base region plus an offset limits
the Default Binding strategy, because in C aggregates can contain other
aggregates. In the current implementation of RegionStore, there is no way to
distinguish a Default binding for an entire aggregate from a Default binding
for the sub-aggregate at offset 0.
Lazy Bindings (LazyCompoundVal)
===============================
RegionStore implements an optimization for copying aggregates (structs and
arrays) called "lazy bindings", implemented using a special SVal called
LazyCompoundVal. When the store is asked for the "binding" for an entire
aggregate (i.e. for an lvalue-to-rvalue conversion), it returns a
LazyCompoundVal instead. When this value is then stored into a variable, it is
bound as a Default value. This makes copying arrays and structs much cheaper
than if they had required memberwise access.
Under the hood, a LazyCompoundVal is implemented as a uniqued pair of (region,
store), representing "the value of the region during this 'snapshot' of the
store". This has important implications for any sort of liveness or
reachability analysis, which must take the bindings in the old store into
account.
Retrieving a value from a lazy binding happens in the same way as any other
Default binding: since there is no direct binding, the store manager falls back
to super-regions to look for an appropriate default binding. LazyCompoundVal
differs from a normal default binding, however, in that it contains several
different values, instead of one value that will appear several times. Because
of this, the store manager has to reconstruct the subregion chain on top of the
LazyCompoundVal region, and look up *that* region in the previous store.
Here's a concrete example:
CGPoint p;
p.x = 42; // A Direct binding is made to the FieldRegion 'p.x'.
CGPoint p2 = p; // A LazyCompoundVal is created for 'p', along with a
// snapshot of the current store state. This value is then
// used as a Default binding for the VarRegion 'p2'.
return p2.x; // The binding for FieldRegion 'p2.x' is requested.
// There is no Direct binding, so we look for a Default
// binding to 'p2' and find the LCV.
// Because it's an LCV, we look at our requested region
// and see that it's the '.x' field. We ask for the value
// of 'p.x' within the snapshot, and get back 42.

246
docs/analyzer/conf.py Normal file
View File

@ -0,0 +1,246 @@
# -*- coding: utf-8 -*-
#
# Clang Static Analyzer documentation build configuration file, created by
# sphinx-quickstart on Wed Jan 2 15:54:28 2013.
#
# This file is execfile()d with the current directory set to its containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys, os
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
# -- General configuration -----------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.todo', 'sphinx.ext.mathjax']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'Clang Static Analyzer'
copyright = u'2013, Analyzer Team'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '3.3'
# The full version, including alpha/beta/rc tags.
release = '3.3'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
# The reST default role (used for this markup: `text`) to use for all documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# -- Options for HTML output ---------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'haiku'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = []
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Output file base name for HTML help builder.
htmlhelp_basename = 'ClangStaticAnalyzerdoc'
# -- Options for LaTeX output --------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#'preamble': '',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'ClangStaticAnalyzer.tex', u'Clang Static Analyzer Documentation',
u'Analyzer Team', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output --------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'clangstaticanalyzer', u'Clang Static Analyzer Documentation',
[u'Analyzer Team'], 1)
]
# If true, show URL addresses after external links.
#man_show_urls = False
# -- Options for Texinfo output ------------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
('index', 'ClangStaticAnalyzer', u'Clang Static Analyzer Documentation',
u'Analyzer Team', 'ClangStaticAnalyzer', 'One line description of project.',
'Miscellaneous'),
]
# Documents to append as an appendix to all manuals.
#texinfo_appendices = []
# If false, no module index is generated.
#texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'http://docs.python.org/': None}

View File

@ -1,89 +0,0 @@
The analyzer contains a number of checkers which can aid in debugging. Enable them by using the "-analyzer-checker=" flag, followed by the name of the checker.
General Analysis Dumpers
========================
These checkers are used to dump the results of various infrastructural analyses to stderr. Some checkers also have "view" variants, which will display a graph using a 'dot' format viewer (such as Graphviz on OS X) instead.
- debug.DumpCallGraph, debug.ViewCallGraph: Show the call graph generated for the current translation unit. This is used to determine the order in which to analyze functions when inlining is enabled.
- debug.DumpCFG, debug.ViewCFG: Show the CFG generated for each top-level function being analyzed.
- debug.DumpDominators: Shows the dominance tree for the CFG of each top-level function.
- debug.DumpLiveVars: Show the results of live variable analysis for each top-level function being analyzed.
Path Tracking
=============
These checkers print information about the path taken by the analyzer engine.
- debug.DumpCalls: Prints out every function or method call encountered during a path traversal. This is indented to show the call stack, but does NOT do any special handling of branches, meaning different paths could end up interleaved.
- debug.DumpTraversal: Prints the name of each branch statement encountered during a path traversal ("IfStmt", "WhileStmt", etc). Currently used to check whether the analysis engine is doing BFS or DFS.
State Checking
==============
These checkers will print out information about the analyzer state in the form of analysis warnings. They are intended for use with the -verify functionality in regression tests.
- debug.TaintTest: Prints out the word "tainted" for every expression that carries taint. At the time of this writing, taint was only introduced by the checks under experimental.security.taint.TaintPropagation; this checker may eventually move to the security.taint package.
- debug.ExprInspection: Responds to certain function calls, which are modeled after builtins. These function calls should affect the program state other than the evaluation of their arguments; to use them, you will need to declare them within your test file. The available functions are described below.
(FIXME: debug.ExprInspection should probably be renamed, since it no longer only inspects expressions.)
ExprInspection checks
---------------------
- void clang_analyzer_eval(bool);
Prints TRUE if the argument is known to have a non-zero value,
FALSE if the argument is known to have a zero or null value, and
UNKNOWN if the argument isn't sufficiently constrained on this path.
You can use this to test other values by using expressions like "x == 5".
Note that this functionality is currently DISABLED in inlined functions,
since different calls to the same inlined function could provide different
information, making it difficult to write proper -verify directives.
In C, the argument can be typed as 'int' or as '_Bool'.
Example usage:
clang_analyzer_eval(x); // expected-warning{{UNKNOWN}}
if (!x) return;
clang_analyzer_eval(x); // expected-warning{{TRUE}}
- void clang_analyzer_checkInlined(bool);
If a call occurs within an inlined function, prints TRUE or FALSE according to
the value of its argument. If a call occurs outside an inlined function,
nothing is printed.
The intended use of this checker is to assert that a function is inlined at
least once (by passing 'true' and expecting a warning), or to assert that a
function is never inlined (by passing 'false' and expecting no warning). The
argument is technically unnecessary but is intended to clarify intent.
You might wonder why we can't print TRUE if a function is ever inlined and
FALSE if it is not. The problem is that any inlined function could conceivably
also be analyzed as a top-level function (in which case both TRUE and FALSE
would be printed), depending on the value of the -analyzer-inlining option.
In C, the argument can be typed as 'int' or as '_Bool'.
Example usage:
int inlined() {
clang_analyzer_checkInlined(true); // expected-warning{{TRUE}}
return 42;
}
void topLevel() {
clang_analyzer_checkInlined(false); // no-warning (not inlined)
int value = inlined();
// This assertion will not be valid if the previous call was not inlined.
clang_analyzer_eval(value == 42); // expected-warning{{TRUE}}
}
Statistics
==========
The debug.Stats checker collects various information about the analysis of each function, such as how many blocks were reached and if the analyzer timed out.
There is also an additional -analyzer-stats flag, which enables various statistics within the analyzer engine. Note the Stats checker (which produces at least one bug report per function) may actually change the values reported by -analyzer-stats.

23
docs/analyzer/index.rst Normal file
View File

@ -0,0 +1,23 @@
.. Clang Static Analyzer documentation master file, created by
sphinx-quickstart on Wed Jan 2 15:54:28 2013.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to Clang Static Analyzer's documentation!
=================================================
Contents:
.. toctree::
:maxdepth: 2
DebugChecks
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

190
docs/analyzer/make.bat Normal file
View File

@ -0,0 +1,190 @@
@ECHO OFF
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set BUILDDIR=_build
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
set I18NSPHINXOPTS=%SPHINXOPTS% .
if NOT "%PAPER%" == "" (
set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
)
if "%1" == "" goto help
if "%1" == "help" (
:help
echo.Please use `make ^<target^>` where ^<target^> is one of
echo. html to make standalone HTML files
echo. dirhtml to make HTML files named index.html in directories
echo. singlehtml to make a single large HTML file
echo. pickle to make pickle files
echo. json to make JSON files
echo. htmlhelp to make HTML files and a HTML help project
echo. qthelp to make HTML files and a qthelp project
echo. devhelp to make HTML files and a Devhelp project
echo. epub to make an epub
echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
echo. text to make text files
echo. man to make manual pages
echo. texinfo to make Texinfo files
echo. gettext to make PO message catalogs
echo. changes to make an overview over all changed/added/deprecated items
echo. linkcheck to check all external links for integrity
echo. doctest to run all doctests embedded in the documentation if enabled
goto end
)
if "%1" == "clean" (
for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
del /q /s %BUILDDIR%\*
goto end
)
if "%1" == "html" (
%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/html.
goto end
)
if "%1" == "dirhtml" (
%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
goto end
)
if "%1" == "singlehtml" (
%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
goto end
)
if "%1" == "pickle" (
%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the pickle files.
goto end
)
if "%1" == "json" (
%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the JSON files.
goto end
)
if "%1" == "htmlhelp" (
%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run HTML Help Workshop with the ^
.hhp project file in %BUILDDIR%/htmlhelp.
goto end
)
if "%1" == "qthelp" (
%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run "qcollectiongenerator" with the ^
.qhcp project file in %BUILDDIR%/qthelp, like this:
echo.^> qcollectiongenerator %BUILDDIR%\qthelp\ClangStaticAnalyzer.qhcp
echo.To view the help file:
echo.^> assistant -collectionFile %BUILDDIR%\qthelp\ClangStaticAnalyzer.ghc
goto end
)
if "%1" == "devhelp" (
%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished.
goto end
)
if "%1" == "epub" (
%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The epub file is in %BUILDDIR%/epub.
goto end
)
if "%1" == "latex" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
if errorlevel 1 exit /b 1
echo.
echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "text" (
%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The text files are in %BUILDDIR%/text.
goto end
)
if "%1" == "man" (
%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The manual pages are in %BUILDDIR%/man.
goto end
)
if "%1" == "texinfo" (
%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
goto end
)
if "%1" == "gettext" (
%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
goto end
)
if "%1" == "changes" (
%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
if errorlevel 1 exit /b 1
echo.
echo.The overview file is in %BUILDDIR%/changes.
goto end
)
if "%1" == "linkcheck" (
%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
if errorlevel 1 exit /b 1
echo.
echo.Link check complete; look for any errors in the above output ^
or in %BUILDDIR%/linkcheck/output.txt.
goto end
)
if "%1" == "doctest" (
%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
if errorlevel 1 exit /b 1
echo.
echo.Testing of doctests in the sources finished, look at the ^
results in %BUILDDIR%/doctest/output.txt.
goto end
)
:end

242
docs/conf.py Normal file
View File

@ -0,0 +1,242 @@
# -*- coding: utf-8 -*-
#
# Clang documentation build configuration file, created by
# sphinx-quickstart on Sun Dec 9 20:01:55 2012.
#
# This file is execfile()d with the current directory set to its containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys, os
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
# -- General configuration -----------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.todo', 'sphinx.ext.mathjax']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'Clang'
copyright = u'2007-2013, The Clang Team'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '3.3'
# The full version, including alpha/beta/rc tags.
release = '3.3'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build', 'analyzer']
# The reST default role (used for this markup: `text`) to use for all documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'friendly'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# -- Options for HTML output ---------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'haiku'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = []
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Output file base name for HTML help builder.
htmlhelp_basename = 'Clangdoc'
# -- Options for LaTeX output --------------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#'preamble': '',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'Clang.tex', u'Clang Documentation',
u'The Clang Team', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output --------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'clang', u'Clang Documentation',
[u'The Clang Team'], 1)
]
# If true, show URL addresses after external links.
#man_show_urls = False
# -- Options for Texinfo output ------------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
('index', 'Clang', u'Clang Documentation',
u'The Clang Team', 'Clang', 'One line description of project.',
'Miscellaneous'),
]
# Documents to append as an appendix to all manuals.
#texinfo_appendices = []
# If false, no module index is generated.
#texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'

73
docs/index.rst Normal file
View File

@ -0,0 +1,73 @@
.. Clang documentation master file, created by
sphinx-quickstart on Sun Dec 9 20:01:55 2012.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
.. title:: Welcome to Clang's documentation!
.. toctree::
:maxdepth: 1
ReleaseNotes
Using Clang as a Compiler
=========================
.. toctree::
:maxdepth: 1
UsersManual
LanguageExtensions
AddressSanitizer
ThreadSanitizer
MemorySanitizer
Modules
FAQ
Using Clang as a Library
========================
.. toctree::
:maxdepth: 1
Tooling
ExternalClangExamples
IntroductionToTheClangAST
LibTooling
LibFormat
ClangPlugins
RAVFrontendAction
LibASTMatchersTutorial
LibASTMatchers
HowToSetupToolingForLLVM
JSONCompilationDatabase
Using Clang Tools
=================
.. toctree::
:maxdepth: 1
ClangTools
ClangCheck
ClangFormat
Design Documents
================
.. toctree::
:maxdepth: 1
InternalsManual
DriverInternals
PTHInternals
PCHInternals
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

190
docs/make.bat Normal file
View File

@ -0,0 +1,190 @@
@ECHO OFF
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set BUILDDIR=_build
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
set I18NSPHINXOPTS=%SPHINXOPTS% .
if NOT "%PAPER%" == "" (
set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
)
if "%1" == "" goto help
if "%1" == "help" (
:help
echo.Please use `make ^<target^>` where ^<target^> is one of
echo. html to make standalone HTML files
echo. dirhtml to make HTML files named index.html in directories
echo. singlehtml to make a single large HTML file
echo. pickle to make pickle files
echo. json to make JSON files
echo. htmlhelp to make HTML files and a HTML help project
echo. qthelp to make HTML files and a qthelp project
echo. devhelp to make HTML files and a Devhelp project
echo. epub to make an epub
echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
echo. text to make text files
echo. man to make manual pages
echo. texinfo to make Texinfo files
echo. gettext to make PO message catalogs
echo. changes to make an overview over all changed/added/deprecated items
echo. linkcheck to check all external links for integrity
echo. doctest to run all doctests embedded in the documentation if enabled
goto end
)
if "%1" == "clean" (
for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
del /q /s %BUILDDIR%\*
goto end
)
if "%1" == "html" (
%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/html.
goto end
)
if "%1" == "dirhtml" (
%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
goto end
)
if "%1" == "singlehtml" (
%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
goto end
)
if "%1" == "pickle" (
%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the pickle files.
goto end
)
if "%1" == "json" (
%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the JSON files.
goto end
)
if "%1" == "htmlhelp" (
%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run HTML Help Workshop with the ^
.hhp project file in %BUILDDIR%/htmlhelp.
goto end
)
if "%1" == "qthelp" (
%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run "qcollectiongenerator" with the ^
.qhcp project file in %BUILDDIR%/qthelp, like this:
echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Clang.qhcp
echo.To view the help file:
echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Clang.ghc
goto end
)
if "%1" == "devhelp" (
%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished.
goto end
)
if "%1" == "epub" (
%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The epub file is in %BUILDDIR%/epub.
goto end
)
if "%1" == "latex" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
if errorlevel 1 exit /b 1
echo.
echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "text" (
%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The text files are in %BUILDDIR%/text.
goto end
)
if "%1" == "man" (
%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The manual pages are in %BUILDDIR%/man.
goto end
)
if "%1" == "texinfo" (
%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
goto end
)
if "%1" == "gettext" (
%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
goto end
)
if "%1" == "changes" (
%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
if errorlevel 1 exit /b 1
echo.
echo.The overview file is in %BUILDDIR%/changes.
goto end
)
if "%1" == "linkcheck" (
%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
if errorlevel 1 exit /b 1
echo.
echo.Link check complete; look for any errors in the above output ^
or in %BUILDDIR%/linkcheck/output.txt.
goto end
)
if "%1" == "doctest" (
%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
if errorlevel 1 exit /b 1
echo.
echo.Testing of doctests in the sources finished, look at the ^
results in %BUILDDIR%/doctest/output.txt.
goto end
)
:end

View File

@ -133,24 +133,56 @@ def act_on_decl(declaration, comment, allowed_types):
if declaration.strip():
# Node matchers are defined by writing:
# VariadicDynCastAllOfMatcher<ResultType, ArgumentType> name;
m = re.match(r""".*VariadicDynCastAllOfMatcher\s*<
\s*([^\s,]+)\s*,
\s*([^\s>]+)\s*>
m = re.match(r""".*Variadic(?:DynCast)?AllOfMatcher\s*<
\s*([^\s,]+)\s*(?:,
\s*([^\s>]+)\s*)?>
\s*([^\s;]+)\s*;\s*$""", declaration, flags=re.X)
if m:
result, inner, name = m.groups()
if not inner:
inner = result
add_matcher(result, name, 'Matcher<%s>...' % inner,
comment, is_dyncast=True)
return
# Parse the various matcher definition macros.
m = re.match(r"""^\s*AST_(POLYMORPHIC_)?MATCHER(_P)?(.?)\(
m = re.match(""".*AST_TYPE_MATCHER\(
\s*([^\s,]+\s*),
\s*([^\s,]+\s*)
\)\s*;\s*$""", declaration, flags=re.X)
if m:
inner, name = m.groups()
add_matcher('Type', name, 'Matcher<%s>...' % inner,
comment, is_dyncast=True)
add_matcher('TypeLoc', '%sLoc' % name, 'Matcher<%sLoc>...' % inner,
comment, is_dyncast=True)
return
m = re.match(""".*AST_TYPE(LOC)?_TRAVERSE_MATCHER\(
\s*([^\s,]+\s*),
\s*(?:[^\s,]+\s*)
\)\s*;\s*$""", declaration, flags=re.X)
if m:
loc = m.group(1)
name = m.group(2)
result_types = extract_result_types(comment)
if not result_types:
raise Exception('Did not find allowed result types for: %s' % name)
for result_type in result_types:
add_matcher(result_type, name, 'Matcher<Type>', comment)
if loc:
add_matcher('%sLoc' % result_type, '%sLoc' % name, 'Matcher<TypeLoc>',
comment)
return
m = re.match(r"""^\s*AST_(POLYMORPHIC_)?MATCHER(_P)?(.?)(?:_OVERLOAD)?\(
(?:\s*([^\s,]+)\s*,)?
\s*([^\s,]+)\s*
(?:,\s*([^\s,]+)\s*
,\s*([^\s,]+)\s*)?
(?:,\s*([^\s,]+)\s*
,\s*([^\s,]+)\s*)?
(?:,\s*\d+\s*)?
\)\s*{\s*$""", declaration, flags=re.X)
if m:
p, n, result, name = m.groups()[1:5]
@ -178,9 +210,9 @@ def act_on_decl(declaration, comment, allowed_types):
if m:
result, name, args = m.groups()
args = ', '.join(p.strip() for p in args.split(','))
m = re.match(r'.*\s+internal::Matcher<([^>]+)>$', result)
m = re.match(r'.*\s+internal::(Bindable)?Matcher<([^>]+)>$', result)
if m:
result_types = [m.group(1)]
result_types = [m.group(2)]
else:
result_types = extract_result_types(comment)
if not result_types:

View File

@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#include "clang/Frontend/FrontendPluginRegistry.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/AST.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/Frontend/CompilerInstance.h"
#include "llvm/Support/raw_ostream.h"
using namespace clang;

View File

@ -1,7 +1,7 @@
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/CheckerRegistry.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/CheckerRegistry.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
using namespace clang;
using namespace ento;

View File

@ -5,6 +5,7 @@ set(LLVM_LINK_COMPONENTS
asmparser
bitreader
bitwriter
irreader
codegen
ipo
linker

View File

@ -8,25 +8,24 @@
//===----------------------------------------------------------------------===//
#include "clang/CodeGen/CodeGenAction.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/Tool.h"
#include "clang/Frontend/CompilerInvocation.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/CompilerInvocation.h"
#include "clang/Frontend/FrontendDiagnostic.h"
#include "clang/Frontend/TextDiagnosticPrinter.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "llvm/Module.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ExecutionEngine/JIT.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ExecutionEngine/JIT.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"
using namespace clang;
using namespace clang::driver;
@ -75,14 +74,13 @@ int main(int argc, const char **argv, char * const *envp) {
IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient);
Driver TheDriver(Path.str(), llvm::sys::getDefaultTargetTriple(),
"a.out", /*IsProduction=*/false, Diags);
Driver TheDriver(Path.str(), llvm::sys::getProcessTriple(), "a.out", Diags);
TheDriver.setTitle("clang interpreter");
// FIXME: This is a hack to try to force the driver to do something we can
// recognize. We need to extend the driver library to support this use model
// (basically, exactly one input, and the operation mode is hard wired).
llvm::SmallVector<const char *, 16> Args(argv, argv + argc);
SmallVector<const char *, 16> Args(argv, argv + argc);
Args.push_back("-fsyntax-only");
OwningPtr<Compilation> C(TheDriver.BuildCompilation(Args));
if (!C)
@ -130,7 +128,7 @@ int main(int argc, const char **argv, char * const *envp) {
Clang.setInvocation(CI.take());
// Create the compilers actual diagnostics engine.
Clang.createDiagnostics(int(CCArgs.size()),const_cast<char**>(CCArgs.data()));
Clang.createDiagnostics();
if (!Clang.hasDiagnostics())
return 1;

View File

@ -94,6 +94,12 @@ CINDEX_LINKAGE CXCompileCommands
clang_CompilationDatabase_getCompileCommands(CXCompilationDatabase,
const char *CompleteFileName);
/**
* \brief Get all the compile commands in the given compilation database.
*/
CINDEX_LINKAGE CXCompileCommands
clang_CompilationDatabase_getAllCompileCommands(CXCompilationDatabase);
/**
* \brief Free the given CompileCommands
*/

View File

@ -36,7 +36,7 @@ extern "C" {
* with the string data, call \c clang_disposeString() to free the string.
*/
typedef struct {
void *data;
const void *data;
unsigned private_flags;
} CXString;

View File

@ -32,7 +32,7 @@
* compatible, thus CINDEX_VERSION_MAJOR is expected to remain stable.
*/
#define CINDEX_VERSION_MAJOR 0
#define CINDEX_VERSION_MINOR 6
#define CINDEX_VERSION_MINOR 15
#define CINDEX_VERSION_ENCODE(major, minor) ( \
((major) * 10000) \
@ -296,6 +296,24 @@ CINDEX_LINKAGE CXString clang_getFileName(CXFile SFile);
*/
CINDEX_LINKAGE time_t clang_getFileTime(CXFile SFile);
/**
* \brief Uniquely identifies a CXFile, that refers to the same underlying file,
* across an indexing session.
*/
typedef struct {
unsigned long long data[3];
} CXFileUniqueID;
/**
* \brief Retrieve the unique ID for the given \c file.
*
* \param file the file to get the ID for.
* \param outID stores the returned CXFileUniqueID.
* \returns If there was a failure getting the unique ID, returns non-zero,
* otherwise returns 0.
*/
CINDEX_LINKAGE int clang_getFileUniqueID(CXFile file, CXFileUniqueID *outID);
/**
* \brief Determine whether the given header is guarded against
* multiple inclusions, either with the conventional
@ -342,7 +360,7 @@ CINDEX_LINKAGE CXFile clang_getFile(CXTranslationUnit tu,
* to map a source location to a particular file, line, and column.
*/
typedef struct {
void *ptr_data[2];
const void *ptr_data[2];
unsigned int_data;
} CXSourceLocation;
@ -353,7 +371,7 @@ typedef struct {
* starting and end locations from a source range, respectively.
*/
typedef struct {
void *ptr_data[2];
const void *ptr_data[2];
unsigned begin_int_data;
unsigned end_int_data;
} CXSourceRange;
@ -361,7 +379,7 @@ typedef struct {
/**
* \brief Retrieve a NULL (invalid) source location.
*/
CINDEX_LINKAGE CXSourceLocation clang_getNullLocation();
CINDEX_LINKAGE CXSourceLocation clang_getNullLocation(void);
/**
* \brief Determine whether two source locations, which must refer into
@ -393,7 +411,7 @@ CINDEX_LINKAGE CXSourceLocation clang_getLocationForOffset(CXTranslationUnit tu,
/**
* \brief Retrieve a NULL (invalid) source range.
*/
CINDEX_LINKAGE CXSourceRange clang_getNullRange();
CINDEX_LINKAGE CXSourceRange clang_getNullRange(void);
/**
* \brief Retrieve a source range given the beginning and ending source
@ -530,6 +548,35 @@ CINDEX_LINKAGE void clang_getSpellingLocation(CXSourceLocation location,
unsigned *column,
unsigned *offset);
/**
* \brief Retrieve the file, line, column, and offset represented by
* the given source location.
*
* If the location refers into a macro expansion, return where the macro was
* expanded or where the macro argument was written, if the location points at
* a macro argument.
*
* \param location the location within a source file that will be decomposed
* into its parts.
*
* \param file [out] if non-NULL, will be set to the file to which the given
* source location points.
*
* \param line [out] if non-NULL, will be set to the line to which the given
* source location points.
*
* \param column [out] if non-NULL, will be set to the column to which the given
* source location points.
*
* \param offset [out] if non-NULL, will be set to the offset into the
* buffer to which the given source location points.
*/
CINDEX_LINKAGE void clang_getFileLocation(CXSourceLocation location,
CXFile *file,
unsigned *line,
unsigned *column,
unsigned *offset);
/**
* \brief Retrieve a source location representing the first character within a
* source range.
@ -2072,7 +2119,7 @@ enum CXCursorKind {
typedef struct {
enum CXCursorKind kind;
int xdata;
void *data[3];
const void *data[3];
} CXCursor;
/**
@ -2330,7 +2377,7 @@ typedef struct CXCursorSetImpl *CXCursorSet;
/**
* \brief Creates an empty CXCursorSet.
*/
CINDEX_LINKAGE CXCursorSet clang_createCXCursorSet();
CINDEX_LINKAGE CXCursorSet clang_createCXCursorSet(void);
/**
* \brief Disposes a CXCursorSet and releases its associated memory.
@ -2626,6 +2673,7 @@ enum CXCallingConv {
CXCallingConv_AAPCS = 6,
CXCallingConv_AAPCS_VFP = 7,
CXCallingConv_PnaclCall = 8,
CXCallingConv_IntelOclBicc = 9,
CXCallingConv_Invalid = 100,
CXCallingConv_Unexposed = 200
@ -2646,6 +2694,14 @@ typedef struct {
*/
CINDEX_LINKAGE CXType clang_getCursorType(CXCursor C);
/**
* \brief Pretty-print the underlying type using the rules of the
* language of the translation unit from which it came.
*
* If the type is invalid, an empty string is returned.
*/
CINDEX_LINKAGE CXString clang_getTypeSpelling(CXType CT);
/**
* \brief Retrieve the underlying type of a typedef declaration.
*
@ -2682,19 +2738,28 @@ CINDEX_LINKAGE long long clang_getEnumConstantDeclValue(CXCursor C);
*/
CINDEX_LINKAGE unsigned long long clang_getEnumConstantDeclUnsignedValue(CXCursor C);
/**
* \brief Retrieve the bit width of a bit field declaration as an integer.
*
* If a cursor that is not a bit field declaration is passed in, -1 is returned.
*/
CINDEX_LINKAGE int clang_getFieldDeclBitWidth(CXCursor C);
/**
* \brief Retrieve the number of non-variadic arguments associated with a given
* cursor.
*
* If a cursor that is not a function or method is passed in, -1 is returned.
* The number of arguments can be determined for calls as well as for
* declarations of functions or methods. For other cursors -1 is returned.
*/
CINDEX_LINKAGE int clang_Cursor_getNumArguments(CXCursor C);
/**
* \brief Retrieve the argument cursor of a function or method.
*
* If a cursor that is not a function or method is passed in or the index
* exceeds the number of arguments, an invalid cursor is returned.
* The argument cursor can be determined for calls as well as for declarations
* of functions or methods. For other cursors and for invalid indices, an
* invalid cursor is returned.
*/
CINDEX_LINKAGE CXCursor clang_Cursor_getArgument(CXCursor C, unsigned i);
@ -3284,7 +3349,8 @@ CINDEX_LINKAGE CXString clang_Module_getFullName(CXModule Module);
*
* \returns the number of top level headers associated with this module.
*/
CINDEX_LINKAGE unsigned clang_Module_getNumTopLevelHeaders(CXModule Module);
CINDEX_LINKAGE unsigned clang_Module_getNumTopLevelHeaders(CXTranslationUnit,
CXModule Module);
/**
* \param Module a module object.
@ -3294,7 +3360,8 @@ CINDEX_LINKAGE unsigned clang_Module_getNumTopLevelHeaders(CXModule Module);
* \returns the specified top level header associated with the module.
*/
CINDEX_LINKAGE
CXFile clang_Module_getTopLevelHeader(CXModule Module, unsigned Index);
CXFile clang_Module_getTopLevelHeader(CXTranslationUnit,
CXModule Module, unsigned Index);
/**
* @}
@ -4828,7 +4895,7 @@ CXString clang_codeCompleteGetObjCSelector(CXCodeCompleteResults *Results);
* \brief Return a version string, suitable for showing to a user, but not
* intended to be parsed (the format is not guaranteed to be stable).
*/
CINDEX_LINKAGE CXString clang_getClangVersion();
CINDEX_LINKAGE CXString clang_getClangVersion(void);
/**
@ -4943,6 +5010,23 @@ typedef struct {
enum CXVisitorResult (*visit)(void *context, CXCursor, CXSourceRange);
} CXCursorAndRangeVisitor;
typedef enum {
/**
* \brief Function returned successfully.
*/
CXResult_Success = 0,
/**
* \brief One of the parameters was invalid for the function.
*/
CXResult_Invalid = 1,
/**
* \brief The function was terminated by a callback (e.g. it returned
* CXVisit_Break)
*/
CXResult_VisitBreak = 2
} CXResult;
/**
* \brief Find references of a declaration in a specific file.
*
@ -4954,10 +5038,28 @@ typedef struct {
* each reference found.
* The CXSourceRange will point inside the file; if the reference is inside
* a macro (and not a macro argument) the CXSourceRange will be invalid.
*
* \returns one of the CXResult enumerators.
*/
CINDEX_LINKAGE void clang_findReferencesInFile(CXCursor cursor, CXFile file,
CINDEX_LINKAGE CXResult clang_findReferencesInFile(CXCursor cursor, CXFile file,
CXCursorAndRangeVisitor visitor);
/**
* \brief Find #import/#include directives in a specific file.
*
* \param TU translation unit containing the file to query.
*
* \param file to search for #import/#include directives.
*
* \param visitor callback that will receive pairs of CXCursor/CXSourceRange for
* each directive found.
*
* \returns one of the CXResult enumerators.
*/
CINDEX_LINKAGE CXResult clang_findIncludesInFile(CXTranslationUnit TU,
CXFile file,
CXCursorAndRangeVisitor visitor);
#ifdef __has_feature
# if __has_feature(blocks)
@ -4965,8 +5067,12 @@ typedef enum CXVisitorResult
(^CXCursorAndRangeVisitorBlock)(CXCursor, CXSourceRange);
CINDEX_LINKAGE
void clang_findReferencesInFileWithBlock(CXCursor, CXFile,
CXCursorAndRangeVisitorBlock);
CXResult clang_findReferencesInFileWithBlock(CXCursor, CXFile,
CXCursorAndRangeVisitorBlock);
CINDEX_LINKAGE
CXResult clang_findIncludesInFileWithBlock(CXTranslationUnit, CXFile,
CXCursorAndRangeVisitorBlock);
# endif
#endif
@ -5144,6 +5250,10 @@ typedef struct {
CXIdxLoc classLoc;
} CXIdxIBOutletCollectionAttrInfo;
typedef enum {
CXIdxDeclFlag_Skipped = 0x1
} CXIdxDeclInfoFlags;
typedef struct {
const CXIdxEntityInfo *entityInfo;
CXCursor cursor;
@ -5165,6 +5275,9 @@ typedef struct {
int isImplicit;
const CXIdxAttrInfo *const *attributes;
unsigned numAttributes;
unsigned flags;
} CXIdxDeclInfo;
typedef enum {
@ -5372,16 +5485,14 @@ CINDEX_LINKAGE void
clang_index_setClientEntity(const CXIdxEntityInfo *, CXIdxClientEntity);
/**
* \brief An indexing action, to be applied to one or multiple translation units
* but not on concurrent threads. If there are threads doing indexing
* concurrently, they should use different CXIndexAction objects.
* \brief An indexing action/session, to be applied to one or multiple
* translation units.
*/
typedef void *CXIndexAction;
/**
* \brief An indexing action, to be applied to one or multiple translation units
* but not on concurrent threads. If there are threads doing indexing
* concurrently, they should use different CXIndexAction objects.
* \brief An indexing action/session, to be applied to one or multiple
* translation units.
*
* \param CIdx The index object with which the index action will be associated.
*/
@ -5423,7 +5534,15 @@ typedef enum {
/**
* \brief Suppress all compiler warnings when parsing for indexing.
*/
CXIndexOpt_SuppressWarnings = 0x8
CXIndexOpt_SuppressWarnings = 0x8,
/**
* \brief Skip a function/method body that was already parsed during an
* indexing session assosiated with a \c CXIndexAction object.
* Bodies in system headers are always skipped.
*/
CXIndexOpt_SkipParsedBodiesInSession = 0x10
} CXIndexOptFlags;
/**

View File

@ -11,8 +11,8 @@
#define LLVM_CLANG_ARCMIGRATE_ARCMT_H
#include "clang/ARCMigrate/FileRemapper.h"
#include "clang/Frontend/CompilerInvocation.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Frontend/CompilerInvocation.h"
namespace clang {
class ASTContext;

View File

@ -10,8 +10,8 @@
#ifndef LLVM_CLANG_ARCMIGRATE_ARCMT_ACTION_H
#define LLVM_CLANG_ARCMIGRATE_ARCMT_ACTION_H
#include "clang/Frontend/FrontendAction.h"
#include "clang/ARCMigrate/FileRemapper.h"
#include "clang/Frontend/FrontendAction.h"
#include "llvm/ADT/OwningPtr.h"
namespace clang {

View File

@ -11,9 +11,9 @@
#define LLVM_CLANG_ARCMIGRATE_FILEREMAPPER_H
#include "clang/Basic/LLVM.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringRef.h"
namespace llvm {

View File

@ -15,8 +15,8 @@
#define LLVM_CLANG_AST_APVALUE_H
#include "clang/Basic/LLVM.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/PointerUnion.h"

View File

@ -22,7 +22,7 @@
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprObjC.h"
#include "clang/AST/Type.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/AST/Type.h"
#endif

View File

@ -17,9 +17,9 @@
namespace clang {
class ASTContext;
class CXXRecordDecl;
class Decl;
class DeclGroupRef;
class HandleTagDeclDefinition;
class PPMutationListener;
class ASTMutationListener;
class ASTDeserializationListener; // layering violation because void* is ugly
class SemaConsumer; // layering violation required for safe SemaConsumer
@ -112,11 +112,6 @@ class ASTConsumer {
/// it was actually used.
virtual void HandleVTable(CXXRecordDecl *RD, bool DefinitionRequired) {}
/// \brief If the consumer is interested in preprocessor entities getting
/// modified after their initial creation, it should return a pointer to
/// a PPMutationListener here.
virtual PPMutationListener *GetPPMutationListener() { return 0; }
/// \brief If the consumer is interested in entities getting modified after
/// their initial creation, it should return a pointer to
/// an ASTMutationListener here.
@ -130,6 +125,14 @@ class ASTConsumer {
/// PrintStats - If desired, print any statistics.
virtual void PrintStats() {}
/// \brief This callback is called for each function if the Parser was
/// initialized with \c SkipFunctionBodies set to \c true.
///
/// \return \c true if the function's body should be skipped. The function
/// body may be parsed anyway if it is needed (for instance, if it contains
/// the code completion point or is constexpr).
virtual bool shouldSkipFunctionBody(Decl *D) { return true; }
};
} // end namespace clang.

View File

@ -15,21 +15,23 @@
#ifndef LLVM_CLANG_AST_ASTCONTEXT_H
#define LLVM_CLANG_AST_ASTCONTEXT_H
#include "clang/AST/ASTTypeTraits.h"
#include "clang/AST/CanonicalType.h"
#include "clang/AST/CommentCommandTraits.h"
#include "clang/AST/Decl.h"
#include "clang/AST/LambdaMangleContext.h"
#include "clang/AST/NestedNameSpecifier.h"
#include "clang/AST/PrettyPrinter.h"
#include "clang/AST/RawCommentList.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/TemplateName.h"
#include "clang/AST/Type.h"
#include "clang/Basic/AddressSpaces.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/OperatorKinds.h"
#include "clang/Basic/PartialDiagnostic.h"
#include "clang/Basic/VersionTuple.h"
#include "clang/AST/Decl.h"
#include "clang/AST/LambdaMangleContext.h"
#include "clang/AST/NestedNameSpecifier.h"
#include "clang/AST/PrettyPrinter.h"
#include "clang/AST/TemplateName.h"
#include "clang/AST/Type.h"
#include "clang/AST/CanonicalType.h"
#include "clang/AST/RawCommentList.h"
#include "clang/AST/CommentCommandTraits.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
@ -57,28 +59,12 @@ namespace clang {
class TargetInfo;
class CXXABI;
// Decls
class DeclContext;
class CXXConversionDecl;
class CXXMethodDecl;
class CXXRecordDecl;
class Decl;
class FieldDecl;
class MangleContext;
class ObjCIvarDecl;
class ObjCIvarRefExpr;
class ObjCPropertyDecl;
class ParmVarDecl;
class RecordDecl;
class StoredDeclsMap;
class TagDecl;
class TemplateTemplateParmDecl;
class TemplateTypeParmDecl;
class TranslationUnitDecl;
class TypeDecl;
class TypedefNameDecl;
class UnresolvedSetIterator;
class UsingDecl;
class UsingShadowDecl;
class UnresolvedSetIterator;
namespace Builtin { class Context; }
@ -91,7 +77,7 @@ namespace clang {
class ASTContext : public RefCountedBase<ASTContext> {
ASTContext &this_() { return *this; }
mutable std::vector<Type*> Types;
mutable SmallVector<Type *, 0> Types;
mutable llvm::FoldingSet<ExtQuals> ExtQualNodes;
mutable llvm::FoldingSet<ComplexType> ComplexTypes;
mutable llvm::FoldingSet<PointerType> PointerTypes;
@ -233,6 +219,8 @@ class ASTContext : public RefCountedBase<ASTContext> {
QualType ObjCConstantStringType;
mutable RecordDecl *CFConstantStringTypeDecl;
mutable QualType ObjCSuperType;
QualType ObjCNSStringType;
/// \brief The typedef declaration for the Objective-C "instancetype" type.
@ -343,7 +331,10 @@ class ASTContext : public RefCountedBase<ASTContext> {
/// \brief Mapping from each declaration context to its corresponding lambda
/// mangling context.
llvm::DenseMap<const DeclContext *, LambdaMangleContext> LambdaMangleContexts;
llvm::DenseMap<const DeclContext *, unsigned> UnnamedMangleContexts;
llvm::DenseMap<const TagDecl *, unsigned> UnnamedMangleNumbers;
/// \brief Mapping that stores parameterIndex values for ParmVarDecls when
/// that value exceeds the bitfield size of ParmVarDeclBits.ParameterIndex.
typedef llvm::DenseMap<const VarDecl *, unsigned> ParameterIndexTable;
@ -393,6 +384,58 @@ class ASTContext : public RefCountedBase<ASTContext> {
OwningPtr<ExternalASTSource> ExternalSource;
ASTMutationListener *Listener;
/// \brief Contains parents of a node.
typedef llvm::SmallVector<ast_type_traits::DynTypedNode, 1> ParentVector;
/// \brief Maps from a node to its parents.
typedef llvm::DenseMap<const void *, ParentVector> ParentMap;
/// \brief Returns the parents of the given node.
///
/// Note that this will lazily compute the parents of all nodes
/// and store them for later retrieval. Thus, the first call is O(n)
/// in the number of AST nodes.
///
/// Caveats and FIXMEs:
/// Calculating the parent map over all AST nodes will need to load the
/// full AST. This can be undesirable in the case where the full AST is
/// expensive to create (for example, when using precompiled header
/// preambles). Thus, there are good opportunities for optimization here.
/// One idea is to walk the given node downwards, looking for references
/// to declaration contexts - once a declaration context is found, compute
/// the parent map for the declaration context; if that can satisfy the
/// request, loading the whole AST can be avoided. Note that this is made
/// more complex by statements in templates having multiple parents - those
/// problems can be solved by building closure over the templated parts of
/// the AST, which also avoids touching large parts of the AST.
/// Additionally, we will want to add an interface to already give a hint
/// where to search for the parents, for example when looking at a statement
/// inside a certain function.
///
/// 'NodeT' can be one of Decl, Stmt, Type, TypeLoc,
/// NestedNameSpecifier or NestedNameSpecifierLoc.
template <typename NodeT>
ParentVector getParents(const NodeT &Node) {
return getParents(ast_type_traits::DynTypedNode::create(Node));
}
ParentVector getParents(const ast_type_traits::DynTypedNode &Node) {
assert(Node.getMemoizationData() &&
"Invariant broken: only nodes that support memoization may be "
"used in the parent map.");
if (!AllParents) {
// We always need to run over the whole translation unit, as
// hasAncestor can escape any subtree.
AllParents.reset(
ParentMapASTVisitor::buildMap(*getTranslationUnitDecl()));
}
ParentMap::const_iterator I = AllParents->find(Node.getMemoizationData());
if (I == AllParents->end()) {
return ParentVector();
}
return I->second;
}
const clang::PrintingPolicy &getPrintingPolicy() const {
return PrintingPolicy;
}
@ -713,6 +756,10 @@ class ASTContext : public RefCountedBase<ASTContext> {
CanQualType PseudoObjectTy, ARCUnbridgedCastTy;
CanQualType ObjCBuiltinIdTy, ObjCBuiltinClassTy, ObjCBuiltinSelTy;
CanQualType ObjCBuiltinBoolTy;
CanQualType OCLImage1dTy, OCLImage1dArrayTy, OCLImage1dBufferTy;
CanQualType OCLImage2dTy, OCLImage2dArrayTy;
CanQualType OCLImage3dTy;
CanQualType OCLSamplerTy, OCLEventTy;
// Types for deductions in C++0x [stmt.ranged]'s desugaring. Built on demand.
mutable QualType AutoDeductTy; // Deduction against 'auto'.
@ -755,7 +802,7 @@ class ASTContext : public RefCountedBase<ASTContext> {
ASTMutationListener *getASTMutationListener() const { return Listener; }
void PrintStats() const;
const std::vector<Type*>& getTypes() const { return Types; }
const SmallVectorImpl<Type *>& getTypes() const { return Types; }
/// \brief Retrieve the declaration for the 128-bit signed integer type.
TypedefDecl *getInt128Decl() const;
@ -857,12 +904,17 @@ class ASTContext : public RefCountedBase<ASTContext> {
return cudaConfigureCallDecl;
}
/// Builds the struct used for __block variables.
QualType BuildByRefType(StringRef DeclName, QualType Ty) const;
/// Returns true iff we need copy/dispose helpers for the given type.
bool BlockRequiresCopying(QualType Ty) const;
bool BlockRequiresCopying(QualType Ty, const VarDecl *D);
/// Returns true, if given type has a known lifetime. HasByrefExtendedLayout is set
/// to false in this case. If HasByrefExtendedLayout returns true, byref variable
/// has extended lifetime.
bool getByrefLifetime(QualType Ty,
Qualifiers::ObjCLifetime &Lifetime,
bool &HasByrefExtendedLayout) const;
/// \brief Return the uniqued reference to the type for an lvalue reference
/// to the specified type.
QualType getLValueReferenceType(QualType T, bool SpelledAsLValue = true)
@ -941,8 +993,7 @@ class ASTContext : public RefCountedBase<ASTContext> {
}
/// \brief Return a normal function type with a typed argument list.
QualType getFunctionType(QualType ResultTy,
const QualType *Args, unsigned NumArgs,
QualType getFunctionType(QualType ResultTy, ArrayRef<QualType> Args,
const FunctionProtoType::ExtProtoInfo &EPI) const;
/// \brief Return the unique reference to the type for the specified type
@ -1025,7 +1076,7 @@ class ASTContext : public RefCountedBase<ASTContext> {
const TemplateArgument *Args) const;
QualType getPackExpansionType(QualType Pattern,
llvm::Optional<unsigned> NumExpansions);
Optional<unsigned> NumExpansions);
QualType getObjCInterfaceType(const ObjCInterfaceDecl *Decl,
ObjCInterfaceDecl *PrevDecl = 0) const;
@ -1094,6 +1145,14 @@ class ASTContext : public RefCountedBase<ASTContext> {
/// defined in <stddef.h> as defined by the target.
QualType getWIntType() const { return WIntTy; }
/// \brief Return a type compatible with "intptr_t" (C99 7.18.1.4),
/// as defined by the target.
QualType getIntPtrType() const;
/// \brief Return a type compatible with "uintptr_t" (C99 7.18.1.4),
/// as defined by the target.
QualType getUIntPtrType() const;
/// \brief Return the unique type for "ptrdiff_t" (C99 7.17) defined in
/// <stddef.h>. Pointer - pointer requires this (C99 6.5.6p9).
QualType getPointerDiffType() const;
@ -1104,7 +1163,11 @@ class ASTContext : public RefCountedBase<ASTContext> {
/// \brief Return the C structure type used to represent constant CFStrings.
QualType getCFConstantStringType() const;
/// \brief Returns the C struct type for objc_super
QualType getObjCSuperType() const;
void setObjCSuperType(QualType ST) { ObjCSuperType = ST; }
/// Get the structure type used to representation CFStrings, or NULL
/// if it hasn't yet been built.
QualType getRawCFConstantStringType() const {
@ -1545,14 +1608,27 @@ class ASTContext : public RefCountedBase<ASTContext> {
const ASTRecordLayout &
getASTObjCImplementationLayout(const ObjCImplementationDecl *D) const;
/// \brief Get the key function for the given record decl, or NULL if there
/// isn't one.
/// \brief Get our current best idea for the key function of the
/// given record decl, or NULL if there isn't one.
///
/// The key function is, according to the Itanium C++ ABI section 5.2.3:
/// ...the first non-pure virtual function that is not inline at the
/// point of class definition.
///
/// ...the first non-pure virtual function that is not inline at the point
/// of class definition.
const CXXMethodDecl *getKeyFunction(const CXXRecordDecl *RD);
/// Other ABIs use the same idea. However, the ARM C++ ABI ignores
/// virtual functions that are defined 'inline', which means that
/// the result of this computation can change.
const CXXMethodDecl *getCurrentKeyFunction(const CXXRecordDecl *RD);
/// \brief Observe that the given method cannot be a key function.
/// Checks the key-function cache for the method's class and clears it
/// if matches the given declaration.
///
/// This is used in ABIs where out-of-line definitions marked
/// inline are not considered to be key functions.
///
/// \param method should be the declaration from the class definition
void setNonKeyFunction(const CXXMethodDecl *method);
/// Get the offset of a FieldDecl or IndirectFieldDecl, in bits.
uint64_t getFieldOffset(const ValueDecl *FD) const;
@ -1885,8 +1961,8 @@ class ASTContext : public RefCountedBase<ASTContext> {
// Type Iterators.
//===--------------------------------------------------------------------===//
typedef std::vector<Type*>::iterator type_iterator;
typedef std::vector<Type*>::const_iterator const_type_iterator;
typedef SmallVectorImpl<Type *>::iterator type_iterator;
typedef SmallVectorImpl<Type *>::const_iterator const_type_iterator;
type_iterator types_begin() { return Types.begin(); }
type_iterator types_end() { return Types.end(); }
@ -1943,7 +2019,7 @@ class ASTContext : public RefCountedBase<ASTContext> {
/// \brief Returns the Objective-C interface that \p ND belongs to if it is
/// an Objective-C method/property/ivar etc. that is part of an interface,
/// otherwise returns null.
ObjCInterfaceDecl *getObjContainingInterface(NamedDecl *ND) const;
const ObjCInterfaceDecl *getObjContainingInterface(const NamedDecl *ND) const;
/// \brief Set the copy inialization expression of a block var decl.
void setBlockVarCopyInits(VarDecl*VD, Expr* Init);
@ -1993,6 +2069,9 @@ class ASTContext : public RefCountedBase<ASTContext> {
/// it is not used.
bool DeclMustBeEmitted(const Decl *D);
void addUnnamedTag(const TagDecl *Tag);
int getUnnamedTagManglingNumber(const TagDecl *Tag) const;
/// \brief Retrieve the lambda mangling number for a lambda expression.
unsigned getLambdaManglingNumber(CXXMethodDecl *CallOperator);
@ -2077,7 +2156,8 @@ class ASTContext : public RefCountedBase<ASTContext> {
bool EncodingProperty = false,
bool StructField = false,
bool EncodeBlockParameters = false,
bool EncodeClassNames = false) const;
bool EncodeClassNames = false,
bool EncodePointerToObjCTypedef = false) const;
// Adds the encoding of the structure's members.
void getObjCEncodingForStructureImpl(RecordDecl *RD, std::string &S,
@ -2109,8 +2189,81 @@ class ASTContext : public RefCountedBase<ASTContext> {
friend class DeclContext;
friend class DeclarationNameTable;
void ReleaseDeclContextMaps();
/// \brief A \c RecursiveASTVisitor that builds a map from nodes to their
/// parents as defined by the \c RecursiveASTVisitor.
///
/// Note that the relationship described here is purely in terms of AST
/// traversal - there are other relationships (for example declaration context)
/// in the AST that are better modeled by special matchers.
///
/// FIXME: Currently only builds up the map using \c Stmt and \c Decl nodes.
class ParentMapASTVisitor : public RecursiveASTVisitor<ParentMapASTVisitor> {
public:
/// \brief Builds and returns the translation unit's parent map.
///
/// The caller takes ownership of the returned \c ParentMap.
static ParentMap *buildMap(TranslationUnitDecl &TU) {
ParentMapASTVisitor Visitor(new ParentMap);
Visitor.TraverseDecl(&TU);
return Visitor.Parents;
}
private:
typedef RecursiveASTVisitor<ParentMapASTVisitor> VisitorBase;
ParentMapASTVisitor(ParentMap *Parents) : Parents(Parents) {
}
bool shouldVisitTemplateInstantiations() const {
return true;
}
bool shouldVisitImplicitCode() const {
return true;
}
// Disables data recursion. We intercept Traverse* methods in the RAV, which
// are not triggered during data recursion.
bool shouldUseDataRecursionFor(clang::Stmt *S) const {
return false;
}
template <typename T>
bool TraverseNode(T *Node, bool(VisitorBase:: *traverse) (T *)) {
if (Node == NULL)
return true;
if (ParentStack.size() > 0)
// FIXME: Currently we add the same parent multiple times, for example
// when we visit all subexpressions of template instantiations; this is
// suboptimal, bug benign: the only way to visit those is with
// hasAncestor / hasParent, and those do not create new matches.
// The plan is to enable DynTypedNode to be storable in a map or hash
// map. The main problem there is to implement hash functions /
// comparison operators for all types that DynTypedNode supports that
// do not have pointer identity.
(*Parents)[Node].push_back(ParentStack.back());
ParentStack.push_back(ast_type_traits::DynTypedNode::create(*Node));
bool Result = (this ->* traverse) (Node);
ParentStack.pop_back();
return Result;
}
bool TraverseDecl(Decl *DeclNode) {
return TraverseNode(DeclNode, &VisitorBase::TraverseDecl);
}
bool TraverseStmt(Stmt *StmtNode) {
return TraverseNode(StmtNode, &VisitorBase::TraverseStmt);
}
ParentMap *Parents;
llvm::SmallVector<ast_type_traits::DynTypedNode, 16> ParentStack;
friend class RecursiveASTVisitor<ParentMapASTVisitor>;
};
llvm::OwningPtr<ParentMap> AllParents;
};
/// \brief Utility function for constructing a nullary selector.
static inline Selector GetNullarySelector(StringRef name, ASTContext& Ctx) {
IdentifierInfo* II = &Ctx.Idents.get(name);
@ -2132,8 +2285,8 @@ static inline Selector GetUnarySelector(StringRef name, ASTContext& Ctx) {
/// This placement form of operator new uses the ASTContext's allocator for
/// obtaining memory.
///
/// IMPORTANT: These are also declared in clang/AST/Attr.h! Any changes here
/// need to also be made there.
/// IMPORTANT: These are also declared in clang/AST/AttrIterator.h! Any changes
/// here need to also be made there.
///
/// We intentionally avoid using a nothrow specification here so that the calls
/// to this operator will not perform a null check on the result -- the

View File

@ -48,6 +48,9 @@ namespace clang {
/// \brief Whether to perform a minimal import.
bool Minimal;
/// \brief Whether the last diagnostic came from the "from" context.
bool LastDiagFromFrom;
/// \brief Mapping from the already-imported types in the "from" context
/// to the corresponding types in the "to" context.

View File

@ -16,18 +16,19 @@
#include "clang/Basic/SourceLocation.h"
namespace clang {
class Decl;
class DeclContext;
class TagDecl;
class CXXRecordDecl;
class ClassTemplateDecl;
class ClassTemplateSpecializationDecl;
class Decl;
class DeclContext;
class FunctionDecl;
class FunctionTemplateDecl;
class ObjCCategoryDecl;
class ObjCInterfaceDecl;
class ObjCContainerDecl;
class ObjCInterfaceDecl;
class ObjCPropertyDecl;
class TagDecl;
class VarDecl;
/// \brief An abstract interface that should be implemented by listeners
/// that want to be notified when an AST entity gets modified after its

View File

@ -1,4 +1,4 @@
//===--- ASTMatchersTypeTraits.h --------------------------------*- C++ -*-===//
//===--- ASTTypeTraits.h ----------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -12,11 +12,12 @@
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_AST_MATCHERS_AST_TYPE_TRAITS_H
#define LLVM_CLANG_AST_MATCHERS_AST_TYPE_TRAITS_H
#ifndef LLVM_CLANG_AST_AST_TYPE_TRAITS_H
#define LLVM_CLANG_AST_AST_TYPE_TRAITS_H
#include "clang/AST/Decl.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/TypeLoc.h"
#include "llvm/Support/AlignOf.h"
namespace clang {
@ -87,8 +88,9 @@ class DynTypedNode {
/// guaranteed to be unique pointers pointing to dedicated storage in the
/// AST. \c QualTypes on the other hand do not have storage or unique
/// pointers and thus need to be stored by value.
llvm::AlignedCharArrayUnion<Decl*, QualType, TypeLoc, NestedNameSpecifierLoc>
Storage;
llvm::AlignedCharArrayUnion<Decl *, Stmt *, NestedNameSpecifier,
NestedNameSpecifierLoc, QualType, Type,
TypeLoc> Storage;
};
// FIXME: Pull out abstraction for the following.
@ -206,4 +208,4 @@ inline const void *DynTypedNode::getMemoizationData() const {
} // end namespace ast_type_traits
} // end namespace clang
#endif // LLVM_CLANG_AST_MATCHERS_AST_TYPE_TRAITS_H
#endif // LLVM_CLANG_AST_AST_TYPE_TRAITS_H

View File

@ -0,0 +1,86 @@
//===-- ASTUnresolvedSet.h - Unresolved sets of declarations ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file provides an UnresolvedSet-like class, whose contents are
// allocated using the allocator associated with an ASTContext.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_AST_ASTUNRESOLVEDSET_H
#define LLVM_CLANG_AST_ASTUNRESOLVEDSET_H
#include "clang/AST/ASTVector.h"
#include "clang/AST/UnresolvedSet.h"
namespace clang {
/// \brief An UnresolvedSet-like class which uses the ASTContext's allocator.
class ASTUnresolvedSet {
typedef ASTVector<DeclAccessPair> DeclsTy;
DeclsTy Decls;
ASTUnresolvedSet(const ASTUnresolvedSet &) LLVM_DELETED_FUNCTION;
void operator=(const ASTUnresolvedSet &) LLVM_DELETED_FUNCTION;
public:
ASTUnresolvedSet() {}
ASTUnresolvedSet(ASTContext &C, unsigned N) : Decls(C, N) {}
typedef UnresolvedSetIterator iterator;
typedef UnresolvedSetIterator const_iterator;
iterator begin() { return iterator(Decls.begin()); }
iterator end() { return iterator(Decls.end()); }
const_iterator begin() const { return const_iterator(Decls.begin()); }
const_iterator end() const { return const_iterator(Decls.end()); }
void addDecl(ASTContext &C, NamedDecl *D) {
addDecl(C, D, AS_none);
}
void addDecl(ASTContext &C, NamedDecl *D, AccessSpecifier AS) {
Decls.push_back(DeclAccessPair::make(D, AS), C);
}
/// Replaces the given declaration with the new one, once.
///
/// \return true if the set changed
bool replace(const NamedDecl* Old, NamedDecl *New) {
for (DeclsTy::iterator I = Decls.begin(), E = Decls.end(); I != E; ++I)
if (I->getDecl() == Old)
return (I->setDecl(New), true);
return false;
}
void erase(unsigned I) {
Decls[I] = Decls.back();
Decls.pop_back();
}
void clear() { Decls.clear(); }
bool empty() const { return Decls.empty(); }
unsigned size() const { return Decls.size(); }
void reserve(ASTContext &C, unsigned N) {
Decls.reserve(C, N);
}
void append(ASTContext &C, iterator I, iterator E) {
Decls.append(C, I.ir, E.ir);
}
DeclAccessPair &operator[](unsigned I) { return Decls[I]; }
const DeclAccessPair &operator[](unsigned I) const { return Decls[I]; }
};
} // namespace clang
#endif

View File

@ -18,12 +18,13 @@
#ifndef LLVM_CLANG_AST_VECTOR
#define LLVM_CLANG_AST_VECTOR
#include "llvm/Support/type_traits.h"
#include "llvm/Support/Allocator.h"
#include "clang/AST/AttrIterator.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/type_traits.h"
#include <algorithm>
#include <memory>
#include <cstring>
#include <memory>
#ifdef _MSC_VER
namespace std {
@ -50,6 +51,7 @@ namespace std {
#endif
namespace clang {
class ASTContext;
template<typename T>
class ASTVector {
@ -59,7 +61,9 @@ class ASTVector {
public:
// Default ctor - Initialize to empty.
explicit ASTVector(ASTContext &C, unsigned N = 0)
ASTVector() : Begin(NULL), End(NULL), Capacity(NULL) { }
ASTVector(ASTContext &C, unsigned N)
: Begin(NULL), End(NULL), Capacity(NULL) {
reserve(C, N);
}

View File

@ -14,9 +14,10 @@
#ifndef LLVM_CLANG_AST_ATTR_H
#define LLVM_CLANG_AST_ATTR_H
#include "clang/Basic/LLVM.h"
#include "clang/Basic/AttrKinds.h"
#include "clang/AST/AttrIterator.h"
#include "clang/AST/Type.h"
#include "clang/Basic/AttrKinds.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/VersionTuple.h"
#include "llvm/ADT/SmallVector.h"
@ -26,7 +27,6 @@
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstring>
#include <algorithm>
namespace clang {
class ASTContext;
@ -36,23 +36,6 @@ namespace clang {
class QualType;
class FunctionDecl;
class TypeSourceInfo;
}
// Defined in ASTContext.h
void *operator new(size_t Bytes, const clang::ASTContext &C,
size_t Alignment = 16);
// FIXME: Being forced to not have a default argument here due to redeclaration
// rules on default arguments sucks
void *operator new[](size_t Bytes, const clang::ASTContext &C,
size_t Alignment);
// It is good practice to pair new/delete operators. Also, MSVC gives many
// warnings if a matching delete overload is not declared, even though the
// throw() spec guarantees it will not be implicitly called.
void operator delete(void *Ptr, const clang::ASTContext &C, size_t);
void operator delete[](void *Ptr, const clang::ASTContext &C, size_t);
namespace clang {
/// Attr - This represents one attribute.
class Attr {
@ -61,10 +44,16 @@ class Attr {
unsigned AttrKind : 16;
protected:
/// An index into the spelling list of an
/// attribute defined in Attr.td file.
unsigned SpellingListIndex : 4;
bool Inherited : 1;
bool IsPackExpansion : 1;
virtual ~Attr();
void* operator new(size_t bytes) throw() {
llvm_unreachable("Attrs cannot be allocated with regular 'new'.");
}
@ -84,14 +73,17 @@ class Attr {
}
protected:
Attr(attr::Kind AK, SourceRange R)
: Range(R), AttrKind(AK), Inherited(false) {}
Attr(attr::Kind AK, SourceRange R, unsigned SpellingListIndex = 0)
: Range(R), AttrKind(AK), SpellingListIndex(SpellingListIndex),
Inherited(false), IsPackExpansion(false) {}
public:
attr::Kind getKind() const {
return static_cast<attr::Kind>(AttrKind);
}
unsigned getSpellingListIndex() const { return SpellingListIndex; }
SourceLocation getLocation() const { return Range.getBegin(); }
SourceRange getRange() const { return Range; }
@ -99,21 +91,24 @@ class Attr {
bool isInherited() const { return Inherited; }
void setPackExpansion(bool PE) { IsPackExpansion = PE; }
bool isPackExpansion() const { return IsPackExpansion; }
// Clone this attribute.
virtual Attr* clone(ASTContext &C) const = 0;
virtual Attr *clone(ASTContext &C) const = 0;
virtual bool isLateParsed() const { return false; }
// Pretty print this attribute.
virtual void printPretty(llvm::raw_ostream &OS,
virtual void printPretty(raw_ostream &OS,
const PrintingPolicy &Policy) const = 0;
};
class InheritableAttr : public Attr {
virtual void anchor();
protected:
InheritableAttr(attr::Kind AK, SourceRange R)
: Attr(AK, R) {}
InheritableAttr(attr::Kind AK, SourceRange R, unsigned SpellingListIndex = 0)
: Attr(AK, R, SpellingListIndex) {}
public:
void setInherited(bool I) { Inherited = I; }
@ -127,126 +122,36 @@ class InheritableAttr : public Attr {
class InheritableParamAttr : public InheritableAttr {
virtual void anchor();
protected:
InheritableParamAttr(attr::Kind AK, SourceRange R)
: InheritableAttr(AK, R) {}
InheritableParamAttr(attr::Kind AK, SourceRange R,
unsigned SpellingListIndex = 0)
: InheritableAttr(AK, R, SpellingListIndex) {}
public:
// Implement isa/cast/dyncast/etc.
static bool classof(const Attr *A) {
// Relies on relative order of enum emission with respect to MS inheritance
// attrs.
return A->getKind() <= attr::LAST_INHERITABLE_PARAM;
}
};
class MSInheritanceAttr : public InheritableAttr {
virtual void anchor();
protected:
MSInheritanceAttr(attr::Kind AK, SourceRange R, unsigned SpellingListIndex = 0)
: InheritableAttr(AK, R, SpellingListIndex) {}
public:
// Implement isa/cast/dyncast/etc.
static bool classof(const Attr *A) {
// Relies on relative order of enum emission with respect to param attrs.
return (A->getKind() <= attr::LAST_MS_INHERITABLE &&
A->getKind() > attr::LAST_INHERITABLE_PARAM);
}
};
#include "clang/AST/Attrs.inc"
/// AttrVec - A vector of Attr, which is how they are stored on the AST.
typedef SmallVector<Attr*, 2> AttrVec;
typedef SmallVector<const Attr*, 2> ConstAttrVec;
/// specific_attr_iterator - Iterates over a subrange of an AttrVec, only
/// providing attributes that are of a specifc type.
template <typename SpecificAttr, typename Container = AttrVec>
class specific_attr_iterator {
typedef typename Container::const_iterator Iterator;
/// Current - The current, underlying iterator.
/// In order to ensure we don't dereference an invalid iterator unless
/// specifically requested, we don't necessarily advance this all the
/// way. Instead, we advance it when an operation is requested; if the
/// operation is acting on what should be a past-the-end iterator,
/// then we offer no guarantees, but this way we do not dererence a
/// past-the-end iterator when we move to a past-the-end position.
mutable Iterator Current;
void AdvanceToNext() const {
while (!isa<SpecificAttr>(*Current))
++Current;
}
void AdvanceToNext(Iterator I) const {
while (Current != I && !isa<SpecificAttr>(*Current))
++Current;
}
public:
typedef SpecificAttr* value_type;
typedef SpecificAttr* reference;
typedef SpecificAttr* pointer;
typedef std::forward_iterator_tag iterator_category;
typedef std::ptrdiff_t difference_type;
specific_attr_iterator() : Current() { }
explicit specific_attr_iterator(Iterator i) : Current(i) { }
reference operator*() const {
AdvanceToNext();
return cast<SpecificAttr>(*Current);
}
pointer operator->() const {
AdvanceToNext();
return cast<SpecificAttr>(*Current);
}
specific_attr_iterator& operator++() {
++Current;
return *this;
}
specific_attr_iterator operator++(int) {
specific_attr_iterator Tmp(*this);
++(*this);
return Tmp;
}
friend bool operator==(specific_attr_iterator Left,
specific_attr_iterator Right) {
if (Left.Current < Right.Current)
Left.AdvanceToNext(Right.Current);
else
Right.AdvanceToNext(Left.Current);
return Left.Current == Right.Current;
}
friend bool operator!=(specific_attr_iterator Left,
specific_attr_iterator Right) {
return !(Left == Right);
}
};
template <typename SpecificAttr, typename Container>
inline specific_attr_iterator<SpecificAttr, Container>
specific_attr_begin(const Container& container) {
return specific_attr_iterator<SpecificAttr, Container>(container.begin());
}
template <typename SpecificAttr, typename Container>
inline specific_attr_iterator<SpecificAttr, Container>
specific_attr_end(const Container& container) {
return specific_attr_iterator<SpecificAttr, Container>(container.end());
}
template <typename SpecificAttr, typename Container>
inline bool hasSpecificAttr(const Container& container) {
return specific_attr_begin<SpecificAttr>(container) !=
specific_attr_end<SpecificAttr>(container);
}
template <typename SpecificAttr, typename Container>
inline SpecificAttr *getSpecificAttr(const Container& container) {
specific_attr_iterator<SpecificAttr, Container> i =
specific_attr_begin<SpecificAttr>(container);
if (i != specific_attr_end<SpecificAttr>(container))
return *i;
else
return 0;
}
/// getMaxAlignment - Returns the highest alignment value found among
/// AlignedAttrs in an AttrVec, or 0 if there are none.
inline unsigned getMaxAttrAlignment(const AttrVec& V, ASTContext &Ctx) {
unsigned Align = 0;
specific_attr_iterator<AlignedAttr> i(V.begin()), e(V.end());
for(; i != e; ++i)
Align = std::max(Align, i->getAlignment(Ctx));
return Align;
}
} // end namespace clang
#endif

View File

@ -0,0 +1,142 @@
//===--- AttrIterator.h - Classes for attribute iteration -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the Attr vector and specific_attr_iterator interfaces.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_AST_ATTRITERATOR_H
#define LLVM_CLANG_AST_ATTRITERATOR_H
#include "clang/Basic/LLVM.h"
#include <iterator>
namespace clang {
class ASTContext;
class Attr;
}
// Defined in ASTContext.h
void *operator new(size_t Bytes, const clang::ASTContext &C,
size_t Alignment = 16);
// FIXME: Being forced to not have a default argument here due to redeclaration
// rules on default arguments sucks
void *operator new[](size_t Bytes, const clang::ASTContext &C,
size_t Alignment);
// It is good practice to pair new/delete operators. Also, MSVC gives many
// warnings if a matching delete overload is not declared, even though the
// throw() spec guarantees it will not be implicitly called.
void operator delete(void *Ptr, const clang::ASTContext &C, size_t);
void operator delete[](void *Ptr, const clang::ASTContext &C, size_t);
namespace clang {
/// AttrVec - A vector of Attr, which is how they are stored on the AST.
typedef SmallVector<Attr*, 2> AttrVec;
typedef SmallVector<const Attr*, 2> ConstAttrVec;
/// specific_attr_iterator - Iterates over a subrange of an AttrVec, only
/// providing attributes that are of a specifc type.
template <typename SpecificAttr, typename Container = AttrVec>
class specific_attr_iterator {
typedef typename Container::const_iterator Iterator;
/// Current - The current, underlying iterator.
/// In order to ensure we don't dereference an invalid iterator unless
/// specifically requested, we don't necessarily advance this all the
/// way. Instead, we advance it when an operation is requested; if the
/// operation is acting on what should be a past-the-end iterator,
/// then we offer no guarantees, but this way we do not dererence a
/// past-the-end iterator when we move to a past-the-end position.
mutable Iterator Current;
void AdvanceToNext() const {
while (!isa<SpecificAttr>(*Current))
++Current;
}
void AdvanceToNext(Iterator I) const {
while (Current != I && !isa<SpecificAttr>(*Current))
++Current;
}
public:
typedef SpecificAttr* value_type;
typedef SpecificAttr* reference;
typedef SpecificAttr* pointer;
typedef std::forward_iterator_tag iterator_category;
typedef std::ptrdiff_t difference_type;
specific_attr_iterator() : Current() { }
explicit specific_attr_iterator(Iterator i) : Current(i) { }
reference operator*() const {
AdvanceToNext();
return cast<SpecificAttr>(*Current);
}
pointer operator->() const {
AdvanceToNext();
return cast<SpecificAttr>(*Current);
}
specific_attr_iterator& operator++() {
++Current;
return *this;
}
specific_attr_iterator operator++(int) {
specific_attr_iterator Tmp(*this);
++(*this);
return Tmp;
}
friend bool operator==(specific_attr_iterator Left,
specific_attr_iterator Right) {
assert((Left.Current == 0) == (Right.Current == 0));
if (Left.Current < Right.Current)
Left.AdvanceToNext(Right.Current);
else
Right.AdvanceToNext(Left.Current);
return Left.Current == Right.Current;
}
friend bool operator!=(specific_attr_iterator Left,
specific_attr_iterator Right) {
return !(Left == Right);
}
};
template <typename SpecificAttr, typename Container>
inline specific_attr_iterator<SpecificAttr, Container>
specific_attr_begin(const Container& container) {
return specific_attr_iterator<SpecificAttr, Container>(container.begin());
}
template <typename SpecificAttr, typename Container>
inline specific_attr_iterator<SpecificAttr, Container>
specific_attr_end(const Container& container) {
return specific_attr_iterator<SpecificAttr, Container>(container.end());
}
template <typename SpecificAttr, typename Container>
inline bool hasSpecificAttr(const Container& container) {
return specific_attr_begin<SpecificAttr>(container) !=
specific_attr_end<SpecificAttr>(container);
}
template <typename SpecificAttr, typename Container>
inline SpecificAttr *getSpecificAttr(const Container& container) {
specific_attr_iterator<SpecificAttr, Container> i =
specific_attr_begin<SpecificAttr>(container);
if (i != specific_attr_end<SpecificAttr>(container))
return *i;
else
return 0;
}
} // end namespace clang
#endif

Some files were not shown because too many files have changed in this diff Show More